diff --git a/docs/documentation/gpuParallelization.md b/docs/documentation/gpuParallelization.md index f3d2089057..af9da2faf4 100644 --- a/docs/documentation/gpuParallelization.md +++ b/docs/documentation/gpuParallelization.md @@ -44,14 +44,16 @@ Note: Ordering is not guaranteed or stable, so use key-value pairing when using **Macro Invocation** -Uses FYPP eval directive using `#:call` +In order to parallelize a loop, simply place two macro calls on either end of the loop: ```C -#:call GPU_PARALLEL_LOOP(...) +$:$GPU_PARALLEL_LOOP(...) {code} -#:endcall GPU_PARALLEL_LOOP +$:END_GPU_PARALLEL_LOOP() ``` +This wraps the lines in `code` with parallelization calls to openACC or openMP, depending on environment and compiler settings. + **Parameters** | name | data type | Default Value | description | diff --git a/src/common/include/acc_macros.fpp b/src/common/include/acc_macros.fpp index 147473250d..771ee976db 100644 --- a/src/common/include/acc_macros.fpp +++ b/src/common/include/acc_macros.fpp @@ -129,7 +129,7 @@ $:end_acc_directive #:enddef -#:def ACC_PARALLEL_LOOP(code, collapse=None, private=None, parallelism='[gang, vector]', & +#:def ACC_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & & default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None) @@ -155,10 +155,7 @@ & deviceptr_val.strip('\n') + attach_val.strip('\n') #:set acc_directive = '!$acc parallel loop ' + & & clause_val + extraAccArgs_val.strip('\n') - #:set acc_end_directive = '!$acc end parallel loop' $:acc_directive - $:code - $:acc_end_directive #:enddef #:def ACC_ROUTINE(function_name=None, parallelism=None, nohost=False, extraAccArgs=None) @@ -308,4 +305,4 @@ #:set acc_directive = '!$acc wait ' + clause_val + extraAccArgs_val.strip('\n') $:acc_directive #:enddef -! New line at end of file is required for FYPP \ No newline at end of file +! New line at end of file is required for FYPP diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index 1c2fb9c985..9fb3abb9a0 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -149,7 +149,7 @@ $:omp_end_directive #:enddef -#:def OMP_PARALLEL_LOOP(code, collapse=None, private=None, parallelism='[gang, vector]', & +#:def OMP_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & & default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & & no_create=None, present=None, deviceptr=None, attach=None, extraOmpArgs=None) @@ -178,21 +178,30 @@ #:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID #:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) ' - #:set omp_end_directive = '!$omp end target teams loop' #:elif MFC_COMPILER == CCE_COMPILER_ID #:set omp_start_directive = '!$omp target teams distribute parallel do simd defaultmap(firstprivate:scalar) ' - #:set omp_end_directive = '!$omp end target teams distribute parallel do simd' #:elif MFC_COMPILER == AMD_COMPILER_ID #:set omp_start_directive = '!$omp target teams distribute parallel do ' - #:set omp_end_directive = '!$omp end target teams distribute parallel do' #:else #:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) ' - #:set omp_end_directive = '!$omp end target teams loop' #:endif #:set omp_directive = omp_start_directive + clause_val + extraOmpArgs_val.strip('\n') $:omp_directive - $:code +#:enddef + +#:def END_OMP_PARALLEL_LOOP() + + #:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID + #:set omp_end_directive = '!$omp end target teams loop' + #:elif MFC_COMPILER == CCE_COMPILER_ID + #:set omp_end_directive = '!$omp end target teams distribute parallel do simd' + #:elif MFC_COMPILER == AMD_COMPILER_ID + #:set omp_end_directive = '!$omp end target teams distribute parallel do' + #:else + #:set omp_end_directive = '!$omp end target teams loop' + #:endif + $:omp_end_directive #:enddef diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index 61bc30b431..bfe4b3beaf 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -19,21 +19,33 @@ #:enddef -#:def GPU_PARALLEL_LOOP(code, collapse=None, private=None, parallelism='[gang, vector]', & +#:def GPU_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & & default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None, extraOmpArgs=None) - #:set acc_code = ACC_PARALLEL_LOOP(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs) - #:set omp_code = OMP_PARALLEL_LOOP(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs) + #:set acc_directive = ACC_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs) + #:set omp_directive = OMP_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs) #if defined(MFC_OpenACC) - $:acc_code + $:acc_directive #elif defined(MFC_OpenMP) - $:omp_code -#else - $:code + $:omp_directive #endif + +#:enddef + +#:def END_GPU_PARALLEL_LOOP() + + #:set acc_end_directive = '!$acc end parallel loop' + #:set omp_end_directive = END_OMP_PARALLEL_LOOP() + +#if defined(MFC_OpenACC) + $:acc_end_directive +#elif defined(MFC_OpenMP) + $:omp_end_directive +#endif + #:enddef #:def GPU_ROUTINE(function_name=None, parallelism=None, nohost=False, cray_inline=False, extraAccArgs=None, extraOmpArgs=None) diff --git a/src/common/include/shared_parallel_macros.fpp b/src/common/include/shared_parallel_macros.fpp index 61134a3df3..a3a0b6f753 100644 --- a/src/common/include/shared_parallel_macros.fpp +++ b/src/common/include/shared_parallel_macros.fpp @@ -107,4 +107,4 @@ #:endif $:extraArgs_val #:enddef -! New line at end of file is required for FYPP \ No newline at end of file +! New line at end of file is required for FYPP diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 81d4bc8d65..892c325276 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -88,61 +88,61 @@ contains if (bc_x%beg >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, -1, sys_size, pb_in, mv_in) else - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = 0, n - select case (int(bc_type(1, -1)%sf(0, k, l))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 1, -1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 1, -1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 1, -1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 1, -1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 1, -1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 1, -1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(1, -1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(1, -1, k, l, pb_in, mv_in) - end if - end do + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = 0, p + do k = 0, n + select case (int(bc_type(1, -1)%sf(0, k, l))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 1, -1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 1, -1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 1, -1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 1, -1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 1, -1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 1, -1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(1, -1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(1, -1, k, l, pb_in, mv_in) + end if end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (bc_x%end >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, 1, sys_size, pb_in, mv_in) else - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = 0, n - select case (int(bc_type(1, 1)%sf(0, k, l))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) ! Ghost-cell extrap. BC at end - call s_ghost_cell_extrapolation(q_prim_vf, 1, 1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 1, 1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 1, 1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 1, 1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 1, 1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 1, 1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(1, 1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(1, 1, k, l, pb_in, mv_in) - end if - end do + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = 0, p + do k = 0, n + select case (int(bc_type(1, 1)%sf(0, k, l))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) ! Ghost-cell extrap. BC at end + call s_ghost_cell_extrapolation(q_prim_vf, 1, 1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 1, 1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 1, 1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 1, 1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 1, 1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 1, 1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(1, 1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(1, 1, k, l, pb_in, mv_in) + end if end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if ! Population of Buffers in y-direction @@ -152,64 +152,64 @@ contains if (bc_y%beg >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, -1, sys_size, pb_in, mv_in) else - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = -buff_size, m + buff_size - select case (int(bc_type(2, -1)%sf(k, 0, l))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 2, -1, k, l) - case (BC_AXIS) - call s_axis(q_prim_vf, pb_in, mv_in, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 2, -1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 2, -1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 2, -1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 2, -1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 2, -1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(2, -1)%sf(k, 0, l) <= BC_GHOST_EXTRAP) .and. & - (bc_type(2, -1)%sf(k, 0, l) /= BC_AXIS)) then - call s_qbmm_extrapolation(2, -1, k, l, pb_in, mv_in) - end if - end do + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = 0, p + do k = -buff_size, m + buff_size + select case (int(bc_type(2, -1)%sf(k, 0, l))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 2, -1, k, l) + case (BC_AXIS) + call s_axis(q_prim_vf, pb_in, mv_in, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 2, -1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 2, -1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 2, -1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 2, -1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 2, -1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(2, -1)%sf(k, 0, l) <= BC_GHOST_EXTRAP) .and. & + (bc_type(2, -1)%sf(k, 0, l) /= BC_AXIS)) then + call s_qbmm_extrapolation(2, -1, k, l, pb_in, mv_in) + end if end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (bc_y%end >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, 1, sys_size, pb_in, mv_in) else - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = -buff_size, m + buff_size - select case (int(bc_type(2, 1)%sf(k, 0, l))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 2, 1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 2, 1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 2, 1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 2, 1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 2, 1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 2, 1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(2, 1)%sf(k, 0, l) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(2, 1, k, l, pb_in, mv_in) - end if - end do + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = 0, p + do k = -buff_size, m + buff_size + select case (int(bc_type(2, 1)%sf(k, 0, l))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 2, 1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 2, 1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 2, 1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 2, 1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 2, 1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 2, 1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(2, 1)%sf(k, 0, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(2, 1, k, l, pb_in, mv_in) + end if end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if ! Population of Buffers in z-direction @@ -219,61 +219,61 @@ contains if (bc_z%beg >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, -1, sys_size, pb_in, mv_in) else - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - select case (int(bc_type(3, -1)%sf(k, l, 0))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 3, -1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 3, -1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 3, -1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 3, -1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 3, -1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 3, -1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(3, -1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(3, -1, k, l, pb_in, mv_in) - end if - end do + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + select case (int(bc_type(3, -1)%sf(k, l, 0))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 3, -1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 3, -1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 3, -1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 3, -1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 3, -1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 3, -1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(3, -1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(3, -1, k, l, pb_in, mv_in) + end if end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (bc_z%end >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, 1, sys_size, pb_in, mv_in) else - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - select case (int(bc_type(3, 1)%sf(k, l, 0))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 3, 1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 3, 1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 3, 1, k, l, pb_in, mv_in) - case (BC_SlIP_WALL) - call s_slip_wall(q_prim_vf, 3, 1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 3, 1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 3, 1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(3, 1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(3, 1, k, l, pb_in, mv_in) - end if - end do + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + select case (int(bc_type(3, 1)%sf(k, l, 0))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 3, 1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 3, 1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 3, 1, k, l, pb_in, mv_in) + case (BC_SlIP_WALL) + call s_slip_wall(q_prim_vf, 3, 1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 3, 1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 3, 1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(3, 1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(3, 1, k, l, pb_in, mv_in) + end if end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if ! END: Population of Buffers in z-direction @@ -1165,39 +1165,39 @@ contains if (bc_x%beg >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 1, -1, num_dims + 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = 0, n - select case (bc_type(1, -1)%sf(0, k, l)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 1, -1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 1, -1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 1, -1, k, l) - end select - end do - end do - #:endcall GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = 0, p + do k = 0, n + select case (bc_type(1, -1)%sf(0, k, l)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 1, -1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 1, -1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 1, -1, k, l) + end select + end do + end do + $:END_GPU_PARALLEL_LOOP() end if if (bc_x%end >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 1, 1, num_dims + 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = 0, n - select case (bc_type(1, 1)%sf(0, k, l)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 1, 1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 1, 1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 1, 1, k, l) - end select - end do - end do - #:endcall GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = 0, p + do k = 0, n + select case (bc_type(1, 1)%sf(0, k, l)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 1, 1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 1, 1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 1, 1, k, l) + end select + end do + end do + $:END_GPU_PARALLEL_LOOP() end if if (n == 0) return @@ -1206,39 +1206,39 @@ contains if (bc_y%beg >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 2, -1, num_dims + 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = -buff_size, m + buff_size - select case (bc_type(2, -1)%sf(k, 0, l)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 2, -1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 2, -1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 2, -1, k, l) - end select - end do - end do - #:endcall GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = 0, p + do k = -buff_size, m + buff_size + select case (bc_type(2, -1)%sf(k, 0, l)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 2, -1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 2, -1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 2, -1, k, l) + end select + end do + end do + $:END_GPU_PARALLEL_LOOP() end if if (bc_y%end >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 2, 1, num_dims + 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = -buff_size, m + buff_size - select case (bc_type(2, 1)%sf(k, 0, l)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 2, 1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 2, 1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 2, 1, k, l) - end select - end do - end do - #:endcall GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = 0, p + do k = -buff_size, m + buff_size + select case (bc_type(2, 1)%sf(k, 0, l)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 2, 1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 2, 1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 2, 1, k, l) + end select + end do + end do + $:END_GPU_PARALLEL_LOOP() end if if (p == 0) return @@ -1247,39 +1247,39 @@ contains if (bc_z%beg >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 3, -1, num_dims + 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - select case (bc_type(3, -1)%sf(k, l, 0)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 3, -1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 3, -1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 3, -1, k, l) - end select - end do - end do - #:endcall GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + select case (bc_type(3, -1)%sf(k, l, 0)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 3, -1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 3, -1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 3, -1, k, l) + end select + end do + end do + $:END_GPU_PARALLEL_LOOP() end if if (bc_z%end >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 3, 1, num_dims + 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - select case (bc_type(3, 1)%sf(k, l, 0)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 3, 1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 3, 1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 3, 1, k, l) - end select - end do - end do - #:endcall GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + select case (bc_type(3, 1)%sf(k, l, 0)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 3, 1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 3, 1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 3, 1, k, l) + end select + end do + end do + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_populate_capillary_buffers @@ -1482,52 +1482,52 @@ contains if (bc_x%beg >= 0) then call s_mpi_sendrecv_variables_buffers(jac_sf, 1, -1, 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = 0, n - select case (bc_type(1, -1)%sf(0, k, l)) - case (BC_PERIODIC) - do j = 1, buff_size - jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(m - j + 1, k, l) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(j - 1, k, l) - end do - case default - do j = 1, buff_size - jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(0, k, l) - end do - end select - end do + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = 0, p + do k = 0, n + select case (bc_type(1, -1)%sf(0, k, l)) + case (BC_PERIODIC) + do j = 1, buff_size + jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(m - j + 1, k, l) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(j - 1, k, l) + end do + case default + do j = 1, buff_size + jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(0, k, l) + end do + end select end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (bc_x%end >= 0) then call s_mpi_sendrecv_variables_buffers(jac_sf, 1, 1, 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = 0, n - select case (bc_type(1, 1)%sf(0, k, l)) - case (BC_PERIODIC) - do j = 1, buff_size - jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(j - 1, k, l) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(m - (j - 1), k, l) - end do - case default - do j = 1, buff_size - jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(m, k, l) - end do - end select - end do + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = 0, p + do k = 0, n + select case (bc_type(1, 1)%sf(0, k, l)) + case (BC_PERIODIC) + do j = 1, buff_size + jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(j - 1, k, l) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(m - (j - 1), k, l) + end do + case default + do j = 1, buff_size + jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(m, k, l) + end do + end select end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if @@ -1536,52 +1536,52 @@ contains else if (bc_y%beg >= 0) then call s_mpi_sendrecv_variables_buffers(jac_sf, 2, -1, 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = idwbuff(1)%beg, idwbuff(1)%end - select case (bc_type(2, -1)%sf(k, 0, l)) - case (BC_PERIODIC) - do j = 1, buff_size - jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, n - j + 1, l) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, j - 1, l) - end do - case default - do j = 1, buff_size - jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, 0, l) - end do - end select - end do + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = 0, p + do k = idwbuff(1)%beg, idwbuff(1)%end + select case (bc_type(2, -1)%sf(k, 0, l)) + case (BC_PERIODIC) + do j = 1, buff_size + jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, n - j + 1, l) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, j - 1, l) + end do + case default + do j = 1, buff_size + jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, 0, l) + end do + end select end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (bc_y%end >= 0) then call s_mpi_sendrecv_variables_buffers(jac_sf, 2, 1, 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = idwbuff(1)%beg, idwbuff(1)%end - select case (bc_type(2, 1)%sf(k, 0, l)) - case (BC_PERIODIC) - do j = 1, buff_size - jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, j - 1, l) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n - (j - 1), l) - end do - case default - do j = 1, buff_size - jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n, l) - end do - end select - end do + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = 0, p + do k = idwbuff(1)%beg, idwbuff(1)%end + select case (bc_type(2, 1)%sf(k, 0, l)) + case (BC_PERIODIC) + do j = 1, buff_size + jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, j - 1, l) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n - (j - 1), l) + end do + case default + do j = 1, buff_size + jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n, l) + end do + end select end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (p == 0) then @@ -1589,51 +1589,51 @@ contains else if (bc_z%beg >= 0) then call s_mpi_sendrecv_variables_buffers(jac_sf, 3, -1, 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(2)%beg, idwbuff(2)%end - do k = idwbuff(1)%beg, idwbuff(1)%end - select case (bc_type(3, -1)%sf(k, l, 0)) - case (BC_PERIODIC) - do j = 1, buff_size - jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, p - j + 1) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, j - 1) - end do - case default - do j = 1, buff_size - jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, 0) - end do - end select - end do + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = idwbuff(2)%beg, idwbuff(2)%end + do k = idwbuff(1)%beg, idwbuff(1)%end + select case (bc_type(3, -1)%sf(k, l, 0)) + case (BC_PERIODIC) + do j = 1, buff_size + jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, p - j + 1) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, j - 1) + end do + case default + do j = 1, buff_size + jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, 0) + end do + end select end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (bc_z%end >= 0) then call s_mpi_sendrecv_variables_buffers(jac_sf, 3, 1, 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(2)%beg, idwbuff(2)%end - do k = idwbuff(1)%beg, idwbuff(1)%end - select case (bc_type(3, 1)%sf(k, l, 0)) - case (BC_PERIODIC) - do j = 1, buff_size - jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, j - 1) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, p - (j - 1)) - end do - case default - do j = 1, buff_size - jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, p) - end do - end select - end do + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = idwbuff(2)%beg, idwbuff(2)%end + do k = idwbuff(1)%beg, idwbuff(1)%end + select case (bc_type(3, 1)%sf(k, l, 0)) + case (BC_PERIODIC) + do j = 1, buff_size + jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, j - 1) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, p - (j - 1)) + end do + case default + do j = 1, buff_size + jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, p) + end do + end select end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_populate_F_igr_buffers diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index 4ba51e9564..afc5696a2c 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -129,37 +129,37 @@ contains real(wp), dimension(num_species) :: Ys real(wp), dimension(num_species) :: omega - #:call GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega, T]') - do z = bounds(3)%beg, bounds(3)%end - do y = bounds(2)%beg, bounds(2)%end - do x = bounds(1)%beg, bounds(1)%end - - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - Ys(eqn - chemxb + 1) = q_prim_qp(eqn)%sf(x, y, z) - end do + $:GPU_PARALLEL_LOOP(collapse=3, private='[x,y,z,eqn,Ys, omega, T]') + do z = bounds(3)%beg, bounds(3)%end + do y = bounds(2)%beg, bounds(2)%end + do x = bounds(1)%beg, bounds(1)%end - rho = q_cons_qp(contxe)%sf(x, y, z) - T = q_T_sf%sf(x, y, z) + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + Ys(eqn - chemxb + 1) = q_prim_qp(eqn)%sf(x, y, z) + end do - call get_net_production_rates(rho, T, Ys, omega) + rho = q_cons_qp(contxe)%sf(x, y, z) + T = q_T_sf%sf(x, y, z) - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - #:block UNDEF_AMD - omega_m = molecular_weights(eqn - chemxb + 1)*omega(eqn - chemxb + 1) - #:endblock UNDEF_AMD - #:block DEF_AMD - omega_m = molecular_weights_nonparameter(eqn - chemxb + 1)*omega(eqn - chemxb + 1) - #:endblock DEF_AMD - rhs_vf(eqn)%sf(x, y, z) = rhs_vf(eqn)%sf(x, y, z) + omega_m + call get_net_production_rates(rho, T, Ys, omega) - end do + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + #:block UNDEF_AMD + omega_m = molecular_weights(eqn - chemxb + 1)*omega(eqn - chemxb + 1) + #:endblock UNDEF_AMD + #:block DEF_AMD + omega_m = molecular_weights_nonparameter(eqn - chemxb + 1)*omega(eqn - chemxb + 1) + #:endblock DEF_AMD + rhs_vf(eqn)%sf(x, y, z) = rhs_vf(eqn)%sf(x, y, z) + omega_m end do + end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_chemistry_reaction_flux @@ -191,114 +191,114 @@ contains offsets = 0 offsets(idir) = 1 - #:call GPU_PARALLEL_LOOP(collapse=3, private='[Ys_L, Ys_R, Ys_cell, Xs_L, Xs_R, mass_diffusivities_mixavg1, mass_diffusivities_mixavg2, mass_diffusivities_mixavg_Cell, h_l, h_r, Xs_cell, h_k, dXk_dxi,Mass_Diffu_Flux]', copyin='[offsets]') - do z = isc3%beg, isc3%end - do y = isc2%beg, isc2%end - do x = isc1%beg, isc1%end - ! Calculate grid spacing using direction-based indexing - select case (idir) - case (1) - grid_spacing = x_cc(x + 1) - x_cc(x) - case (2) - grid_spacing = y_cc(y + 1) - y_cc(y) - case (3) - grid_spacing = z_cc(z + 1) - z_cc(z) - end select - - ! Extract species mass fractions - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys_L(i - chemxb + 1) = q_prim_qp(i)%sf(x, y, z) - Ys_R(i - chemxb + 1) = q_prim_qp(i)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) - Ys_cell(i - chemxb + 1) = 0.5_wp*(Ys_L(i - chemxb + 1) + Ys_R(i - chemxb + 1)) - end do - - ! Calculate molecular weights and mole fractions - call get_mixture_molecular_weight(Ys_L, MW_L) - call get_mixture_molecular_weight(Ys_R, MW_R) - MW_cell = 0.5_wp*(MW_L + MW_R) - - call get_mole_fractions(MW_L, Ys_L, Xs_L) - call get_mole_fractions(MW_R, Ys_R, Xs_R) - - ! Calculate gas constants and thermodynamic properties - Rgas_L = gas_constant/MW_L - Rgas_R = gas_constant/MW_R - - P_L = q_prim_qp(E_idx)%sf(x, y, z) - P_R = q_prim_qp(E_idx)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) - - rho_L = q_prim_qp(1)%sf(x, y, z) - rho_R = q_prim_qp(1)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) - - T_L = P_L/rho_L/Rgas_L - T_R = P_R/rho_R/Rgas_R - - rho_cell = 0.5_wp*(rho_L + rho_R) - dT_dxi = (T_R - T_L)/grid_spacing - - ! Get transport properties - call get_species_mass_diffusivities_mixavg(P_L, T_L, Ys_L, mass_diffusivities_mixavg1) - call get_species_mass_diffusivities_mixavg(P_R, T_R, Ys_R, mass_diffusivities_mixavg2) - - call get_mixture_thermal_conductivity_mixavg(T_L, Ys_L, lambda_L) - call get_mixture_thermal_conductivity_mixavg(T_R, Ys_R, lambda_R) - - call get_species_enthalpies_rt(T_L, h_l) - call get_species_enthalpies_rt(T_R, h_r) - - ! Calculate species properties and gradients - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - h_l(i - chemxb + 1) = h_l(i - chemxb + 1)*gas_constant*T_L/molecular_weights(i - chemxb + 1) - h_r(i - chemxb + 1) = h_r(i - chemxb + 1)*gas_constant*T_R/molecular_weights(i - chemxb + 1) - Xs_cell(i - chemxb + 1) = 0.5_wp*(Xs_L(i - chemxb + 1) + Xs_R(i - chemxb + 1)) - h_k(i - chemxb + 1) = 0.5_wp*(h_l(i - chemxb + 1) + h_r(i - chemxb + 1)) - dXk_dxi(i - chemxb + 1) = (Xs_R(i - chemxb + 1) - Xs_L(i - chemxb + 1))/grid_spacing - end do - - ! Calculate mixture-averaged diffusivities - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - mass_diffusivities_mixavg_Cell(i - chemxb + 1) = & - (mass_diffusivities_mixavg2(i - chemxb + 1) + mass_diffusivities_mixavg1(i - chemxb + 1))/2.0_wp - end do - - lambda_Cell = 0.5_wp*(lambda_R + lambda_L) - - ! Calculate mass diffusion fluxes - rho_Vic = 0.0_wp - Mass_Diffu_Energy = 0.0_wp - - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - Mass_Diffu_Flux(eqn - chemxb + 1) = rho_cell*mass_diffusivities_mixavg_Cell(eqn - chemxb + 1)* & - molecular_weights(eqn - chemxb + 1)/MW_cell*dXk_dxi(eqn - chemxb + 1) - rho_Vic = rho_Vic + Mass_Diffu_Flux(eqn - chemxb + 1) - Mass_Diffu_Energy = Mass_Diffu_Energy + h_k(eqn - chemxb + 1)*Mass_Diffu_Flux(eqn - chemxb + 1) - end do - - ! Apply corrections for mass conservation - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - Mass_Diffu_Energy = Mass_Diffu_Energy - h_k(eqn - chemxb + 1)*Ys_cell(eqn - chemxb + 1)*rho_Vic - Mass_Diffu_Flux(eqn - chemxb + 1) = Mass_Diffu_Flux(eqn - chemxb + 1) - rho_Vic*Ys_cell(eqn - chemxb + 1) - end do - - ! Add thermal conduction contribution - Mass_Diffu_Energy = lambda_Cell*dT_dxi + Mass_Diffu_Energy - - ! Update flux arrays - flux_src_vf(E_idx)%sf(x, y, z) = flux_src_vf(E_idx)%sf(x, y, z) - Mass_Diffu_Energy - - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - flux_src_vf(eqn)%sf(x, y, z) = flux_src_vf(eqn)%sf(x, y, z) - Mass_diffu_Flux(eqn - chemxb + 1) - end do + $:GPU_PARALLEL_LOOP(collapse=3, private='[x,y,z,i,eqn,Ys_L, Ys_R, Ys_cell, Xs_L, Xs_R, mass_diffusivities_mixavg1, mass_diffusivities_mixavg2, mass_diffusivities_mixavg_Cell, h_l, h_r, Xs_cell, h_k, dXk_dxi,Mass_Diffu_Flux]', copyin='[offsets]') + do z = isc3%beg, isc3%end + do y = isc2%beg, isc2%end + do x = isc1%beg, isc1%end + ! Calculate grid spacing using direction-based indexing + select case (idir) + case (1) + grid_spacing = x_cc(x + 1) - x_cc(x) + case (2) + grid_spacing = y_cc(y + 1) - y_cc(y) + case (3) + grid_spacing = z_cc(z + 1) - z_cc(z) + end select + + ! Extract species mass fractions + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Ys_L(i - chemxb + 1) = q_prim_qp(i)%sf(x, y, z) + Ys_R(i - chemxb + 1) = q_prim_qp(i)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) + Ys_cell(i - chemxb + 1) = 0.5_wp*(Ys_L(i - chemxb + 1) + Ys_R(i - chemxb + 1)) + end do + + ! Calculate molecular weights and mole fractions + call get_mixture_molecular_weight(Ys_L, MW_L) + call get_mixture_molecular_weight(Ys_R, MW_R) + MW_cell = 0.5_wp*(MW_L + MW_R) + + call get_mole_fractions(MW_L, Ys_L, Xs_L) + call get_mole_fractions(MW_R, Ys_R, Xs_R) + + ! Calculate gas constants and thermodynamic properties + Rgas_L = gas_constant/MW_L + Rgas_R = gas_constant/MW_R + + P_L = q_prim_qp(E_idx)%sf(x, y, z) + P_R = q_prim_qp(E_idx)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) + + rho_L = q_prim_qp(1)%sf(x, y, z) + rho_R = q_prim_qp(1)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) + + T_L = P_L/rho_L/Rgas_L + T_R = P_R/rho_R/Rgas_R + + rho_cell = 0.5_wp*(rho_L + rho_R) + dT_dxi = (T_R - T_L)/grid_spacing + + ! Get transport properties + call get_species_mass_diffusivities_mixavg(P_L, T_L, Ys_L, mass_diffusivities_mixavg1) + call get_species_mass_diffusivities_mixavg(P_R, T_R, Ys_R, mass_diffusivities_mixavg2) + + call get_mixture_thermal_conductivity_mixavg(T_L, Ys_L, lambda_L) + call get_mixture_thermal_conductivity_mixavg(T_R, Ys_R, lambda_R) + + call get_species_enthalpies_rt(T_L, h_l) + call get_species_enthalpies_rt(T_R, h_r) + + ! Calculate species properties and gradients + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + h_l(i - chemxb + 1) = h_l(i - chemxb + 1)*gas_constant*T_L/molecular_weights(i - chemxb + 1) + h_r(i - chemxb + 1) = h_r(i - chemxb + 1)*gas_constant*T_R/molecular_weights(i - chemxb + 1) + Xs_cell(i - chemxb + 1) = 0.5_wp*(Xs_L(i - chemxb + 1) + Xs_R(i - chemxb + 1)) + h_k(i - chemxb + 1) = 0.5_wp*(h_l(i - chemxb + 1) + h_r(i - chemxb + 1)) + dXk_dxi(i - chemxb + 1) = (Xs_R(i - chemxb + 1) - Xs_L(i - chemxb + 1))/grid_spacing + end do + + ! Calculate mixture-averaged diffusivities + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + mass_diffusivities_mixavg_Cell(i - chemxb + 1) = & + (mass_diffusivities_mixavg2(i - chemxb + 1) + mass_diffusivities_mixavg1(i - chemxb + 1))/2.0_wp + end do + + lambda_Cell = 0.5_wp*(lambda_R + lambda_L) + + ! Calculate mass diffusion fluxes + rho_Vic = 0.0_wp + Mass_Diffu_Energy = 0.0_wp + + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + Mass_Diffu_Flux(eqn - chemxb + 1) = rho_cell*mass_diffusivities_mixavg_Cell(eqn - chemxb + 1)* & + molecular_weights(eqn - chemxb + 1)/MW_cell*dXk_dxi(eqn - chemxb + 1) + rho_Vic = rho_Vic + Mass_Diffu_Flux(eqn - chemxb + 1) + Mass_Diffu_Energy = Mass_Diffu_Energy + h_k(eqn - chemxb + 1)*Mass_Diffu_Flux(eqn - chemxb + 1) + end do + + ! Apply corrections for mass conservation + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + Mass_Diffu_Energy = Mass_Diffu_Energy - h_k(eqn - chemxb + 1)*Ys_cell(eqn - chemxb + 1)*rho_Vic + Mass_Diffu_Flux(eqn - chemxb + 1) = Mass_Diffu_Flux(eqn - chemxb + 1) - rho_Vic*Ys_cell(eqn - chemxb + 1) + end do + + ! Add thermal conduction contribution + Mass_Diffu_Energy = lambda_Cell*dT_dxi + Mass_Diffu_Energy + + ! Update flux arrays + flux_src_vf(E_idx)%sf(x, y, z) = flux_src_vf(E_idx)%sf(x, y, z) - Mass_Diffu_Energy + + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + flux_src_vf(eqn)%sf(x, y, z) = flux_src_vf(eqn)%sf(x, y, z) - Mass_diffu_Flux(eqn - chemxb + 1) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_compute_chemistry_diffusion_flux diff --git a/src/common/m_compute_levelset.fpp b/src/common/m_compute_levelset.fpp index dad7035583..f343c0c47c 100644 --- a/src/common/m_compute_levelset.fpp +++ b/src/common/m_compute_levelset.fpp @@ -43,26 +43,26 @@ contains center(1) = patch_ib(ib_patch_id)%x_centroid center(2) = patch_ib(ib_patch_id)%y_centroid - #:call GPU_PARALLEL_LOOP(private='[i,j,dist_vec,dist]', & + $:GPU_PARALLEL_LOOP(private='[i,j,dist_vec,dist]', & & copyin='[ib_patch_id,center,radius]', collapse=2) - do i = 0, m - do j = 0, n + do i = 0, m + do j = 0, n + + dist_vec(1) = x_cc(i) - center(1) + dist_vec(2) = y_cc(j) - center(2) + dist_vec(3) = 0._wp + dist = sqrt(sum(dist_vec**2)) + levelset%sf(i, j, 0, ib_patch_id) = dist - radius + if (f_approx_equal(dist, 0._wp)) then + levelset_norm%sf(i, j, 0, ib_patch_id, :) = 0 + else + levelset_norm%sf(i, j, 0, ib_patch_id, :) = & + dist_vec(:)/dist + end if - dist_vec(1) = x_cc(i) - center(1) - dist_vec(2) = y_cc(j) - center(2) - dist_vec(3) = 0._wp - dist = sqrt(sum(dist_vec**2)) - levelset%sf(i, j, 0, ib_patch_id) = dist - radius - if (f_approx_equal(dist, 0._wp)) then - levelset_norm%sf(i, j, 0, ib_patch_id, :) = 0 - else - levelset_norm%sf(i, j, 0, ib_patch_id, :) = & - dist_vec(:)/dist - end if - - end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_circle_levelset @@ -87,68 +87,68 @@ contains inverse_rotation(:, :) = patch_ib(ib_patch_id)%rotation_matrix_inverse(:, :) rotation(:, :) = patch_ib(ib_patch_id)%rotation_matrix(:, :) - #:call GPU_PARALLEL_LOOP(private='[i,j,xy_local,k,dist_vec,dist,global_dist,global_id]', & + $:GPU_PARALLEL_LOOP(private='[i,j,xy_local,k,dist_vec,dist,global_dist,global_id]', & & copyin='[ib_patch_id,center,rotation,inverse_rotation,airfoil_grid_u,airfoil_grid_l]', collapse=2) - do i = 0, m - do j = 0, n - xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] ! get coordinate frame centered on IB - xy_local = matmul(inverse_rotation, xy_local) ! rotate the frame into the IB's coordinate - - if (xy_local(2) >= 0._wp) then - ! finds the location on the airfoil grid with the minimum distance (closest) - do k = 1, Np - dist_vec(1) = xy_local(1) - airfoil_grid_u(k)%x - dist_vec(2) = xy_local(2) - airfoil_grid_u(k)%y - dist_vec(3) = 0._wp - dist = sqrt(sum(dist_vec**2)) - if (k == 1) then + do i = 0, m + do j = 0, n + xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] ! get coordinate frame centered on IB + xy_local = matmul(inverse_rotation, xy_local) ! rotate the frame into the IB's coordinate + + if (xy_local(2) >= 0._wp) then + ! finds the location on the airfoil grid with the minimum distance (closest) + do k = 1, Np + dist_vec(1) = xy_local(1) - airfoil_grid_u(k)%x + dist_vec(2) = xy_local(2) - airfoil_grid_u(k)%y + dist_vec(3) = 0._wp + dist = sqrt(sum(dist_vec**2)) + if (k == 1) then + global_dist = dist + global_id = k + else + if (dist < global_dist) then global_dist = dist global_id = k - else - if (dist < global_dist) then - global_dist = dist - global_id = k - end if end if - end do - dist_vec(1) = xy_local(1) - airfoil_grid_u(global_id)%x - dist_vec(2) = xy_local(2) - airfoil_grid_u(global_id)%y + end if + end do + dist_vec(1) = xy_local(1) - airfoil_grid_u(global_id)%x + dist_vec(2) = xy_local(2) - airfoil_grid_u(global_id)%y + dist_vec(3) = 0 + dist = global_dist + else + ! TODO :: This looks identical to the above code but using the lower array. Refactor this. + do k = 1, Np + dist_vec(1) = xy_local(1) - airfoil_grid_l(k)%x + dist_vec(2) = xy_local(2) - airfoil_grid_l(k)%y dist_vec(3) = 0 - dist = global_dist - else - ! TODO :: This looks identical to the above code but using the lower array. Refactor this. - do k = 1, Np - dist_vec(1) = xy_local(1) - airfoil_grid_l(k)%x - dist_vec(2) = xy_local(2) - airfoil_grid_l(k)%y - dist_vec(3) = 0 - dist = sqrt(sum(dist_vec**2)) - if (k == 1) then + dist = sqrt(sum(dist_vec**2)) + if (k == 1) then + global_dist = dist + global_id = k + else + if (dist < global_dist) then global_dist = dist global_id = k - else - if (dist < global_dist) then - global_dist = dist - global_id = k - end if end if - end do - dist_vec(1) = xy_local(1) - airfoil_grid_l(global_id)%x - dist_vec(2) = xy_local(2) - airfoil_grid_l(global_id)%y - dist_vec(3) = 0 - dist = global_dist - end if - - levelset%sf(i, j, 0, ib_patch_id) = dist - if (f_approx_equal(dist, 0._wp)) then - levelset_norm%sf(i, j, 0, ib_patch_id, :) = 0._wp - else - levelset_norm%sf(i, j, 0, ib_patch_id, :) = & - matmul(rotation, dist_vec(:))/dist ! convert the normal vector back to global grid coordinates - end if + end if + end do + dist_vec(1) = xy_local(1) - airfoil_grid_l(global_id)%x + dist_vec(2) = xy_local(2) - airfoil_grid_l(global_id)%y + dist_vec(3) = 0 + dist = global_dist + end if + + levelset%sf(i, j, 0, ib_patch_id) = dist + if (f_approx_equal(dist, 0._wp)) then + levelset_norm%sf(i, j, 0, ib_patch_id, :) = 0._wp + else + levelset_norm%sf(i, j, 0, ib_patch_id, :) = & + matmul(rotation, dist_vec(:))/dist ! convert the normal vector back to global grid coordinates + end if - end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_airfoil_levelset @@ -180,82 +180,82 @@ contains z_max = center(3) + lz/2 z_min = center(3) - lz/2 - #:call GPU_PARALLEL_LOOP(private='[i,j,l,xyz_local,k,dist_vec,dist,global_dist,global_id,dist_side,dist_surf]', & + $:GPU_PARALLEL_LOOP(private='[i,j,l,xyz_local,k,dist_vec,dist,global_dist,global_id,dist_side,dist_surf]', & & copyin='[ib_patch_id,center,rotation,inverse_rotation,airfoil_grid_u,airfoil_grid_l,z_min,z_max]', collapse=3) - do l = 0, p - do j = 0, n - do i = 0, m - - xyz_local = [x_cc(i) - center(1), y_cc(j) - center(2), z_cc(l) - center(3)] ! get coordinate frame centered on IB - xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates - - if (xyz_local(2) >= center(2)) then - do k = 1, Np - dist_vec(1) = xyz_local(1) - airfoil_grid_u(k)%x - dist_vec(2) = xyz_local(2) - airfoil_grid_u(k)%y - dist_vec(3) = 0 - dist_surf = sqrt(sum(dist_vec**2)) - if (k == 1) then + do l = 0, p + do j = 0, n + do i = 0, m + + xyz_local = [x_cc(i) - center(1), y_cc(j) - center(2), z_cc(l) - center(3)] ! get coordinate frame centered on IB + xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates + + if (xyz_local(2) >= center(2)) then + do k = 1, Np + dist_vec(1) = xyz_local(1) - airfoil_grid_u(k)%x + dist_vec(2) = xyz_local(2) - airfoil_grid_u(k)%y + dist_vec(3) = 0 + dist_surf = sqrt(sum(dist_vec**2)) + if (k == 1) then + global_dist = dist_surf + global_id = k + else + if (dist_surf < global_dist) then global_dist = dist_surf global_id = k - else - if (dist_surf < global_dist) then - global_dist = dist_surf - global_id = k - end if end if - end do - dist_vec(1) = xyz_local(1) - airfoil_grid_u(global_id)%x - dist_vec(2) = xyz_local(2) - airfoil_grid_u(global_id)%y + end if + end do + dist_vec(1) = xyz_local(1) - airfoil_grid_u(global_id)%x + dist_vec(2) = xyz_local(2) - airfoil_grid_u(global_id)%y + dist_vec(3) = 0 + dist_surf = global_dist + else + do k = 1, Np + dist_vec(1) = xyz_local(1) - airfoil_grid_l(k)%x + dist_vec(2) = xyz_local(2) - airfoil_grid_l(k)%y dist_vec(3) = 0 - dist_surf = global_dist - else - do k = 1, Np - dist_vec(1) = xyz_local(1) - airfoil_grid_l(k)%x - dist_vec(2) = xyz_local(2) - airfoil_grid_l(k)%y - dist_vec(3) = 0 - dist_surf = sqrt(sum(dist_vec**2)) - if (k == 1) then + dist_surf = sqrt(sum(dist_vec**2)) + if (k == 1) then + global_dist = dist_surf + global_id = k + else + if (dist_surf < global_dist) then global_dist = dist_surf global_id = k - else - if (dist_surf < global_dist) then - global_dist = dist_surf - global_id = k - end if end if - end do - dist_vec(1) = xyz_local(1) - airfoil_grid_l(global_id)%x - dist_vec(2) = xyz_local(2) - airfoil_grid_l(global_id)%y - dist_vec(3) = 0 - dist_surf = global_dist - end if + end if + end do + dist_vec(1) = xyz_local(1) - airfoil_grid_l(global_id)%x + dist_vec(2) = xyz_local(2) - airfoil_grid_l(global_id)%y + dist_vec(3) = 0 + dist_surf = global_dist + end if - dist_side = min(abs(z_cc(l) - z_min), abs(z_max - z_cc(l))) + dist_side = min(abs(z_cc(l) - z_min), abs(z_max - z_cc(l))) - if (dist_side < dist_surf) then - levelset%sf(i, j, l, ib_patch_id) = dist_side - if (f_approx_equal(dist_side, abs(z_cc(l) - z_min))) then - levelset_norm%sf(i, j, l, ib_patch_id, :) = (/0, 0, -1/) - else - levelset_norm%sf(i, j, l, ib_patch_id, :) = (/0, 0, 1/) - end if - levelset_norm%sf(i, j, l, ib_patch_id, :) = & - matmul(rotation, levelset_norm%sf(i, j, l, ib_patch_id, :)/dist_surf) + if (dist_side < dist_surf) then + levelset%sf(i, j, l, ib_patch_id) = dist_side + if (f_approx_equal(dist_side, abs(z_cc(l) - z_min))) then + levelset_norm%sf(i, j, l, ib_patch_id, :) = (/0, 0, -1/) else - levelset%sf(i, j, l, ib_patch_id) = dist_surf - if (f_approx_equal(dist_surf, 0._wp)) then - levelset_norm%sf(i, j, l, ib_patch_id, :) = 0 - else - levelset_norm%sf(i, j, l, ib_patch_id, :) = & - matmul(rotation, dist_vec(:)/dist_surf) - end if + levelset_norm%sf(i, j, l, ib_patch_id, :) = (/0, 0, 1/) end if + levelset_norm%sf(i, j, l, ib_patch_id, :) = & + matmul(rotation, levelset_norm%sf(i, j, l, ib_patch_id, :)/dist_surf) + else + levelset%sf(i, j, l, ib_patch_id) = dist_surf + if (f_approx_equal(dist_surf, 0._wp)) then + levelset_norm%sf(i, j, l, ib_patch_id, :) = 0 + else + levelset_norm%sf(i, j, l, ib_patch_id, :) = & + matmul(rotation, dist_vec(:)/dist_surf) + end if + end if - end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_3D_airfoil_levelset @@ -290,49 +290,49 @@ contains bottom_left(1) = -length_x/2 bottom_left(2) = -length_y/2 - #:call GPU_PARALLEL_LOOP(private='[i,j,k,min_dist,idx,side_dists,xy_local,dist_vec]', & + $:GPU_PARALLEL_LOOP(private='[i,j,k,min_dist,idx,side_dists,xy_local,dist_vec]', & & copyin='[ib_patch_id,center,bottom_left,top_right,inverse_rotation,rotation]', collapse=2) - do i = 0, m - do j = 0, n - xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] - xy_local = matmul(inverse_rotation, xy_local) - - if ((xy_local(1) > bottom_left(1) .and. xy_local(1) < top_right(1)) .or. & - (xy_local(2) > bottom_left(2) .and. xy_local(2) < top_right(2))) then - - side_dists(1) = bottom_left(1) - xy_local(1) - side_dists(2) = top_right(1) - xy_local(1) - side_dists(3) = bottom_left(2) - xy_local(2) - side_dists(4) = top_right(2) - xy_local(2) - min_dist = side_dists(1) - idx = 1 - - do k = 2, 4 - if (abs(side_dists(k)) < abs(min_dist)) then - idx = k - min_dist = side_dists(idx) - end if - end do + do i = 0, m + do j = 0, n + xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] + xy_local = matmul(inverse_rotation, xy_local) + + if ((xy_local(1) > bottom_left(1) .and. xy_local(1) < top_right(1)) .or. & + (xy_local(2) > bottom_left(2) .and. xy_local(2) < top_right(2))) then + + side_dists(1) = bottom_left(1) - xy_local(1) + side_dists(2) = top_right(1) - xy_local(1) + side_dists(3) = bottom_left(2) - xy_local(2) + side_dists(4) = top_right(2) - xy_local(2) + min_dist = side_dists(1) + idx = 1 + + do k = 2, 4 + if (abs(side_dists(k)) < abs(min_dist)) then + idx = k + min_dist = side_dists(idx) + end if + end do - levelset%sf(i, j, 0, ib_patch_id) = side_dists(idx) - dist_vec = 0._wp - if (.not. f_approx_equal(side_dists(idx), 0._wp)) then - if (idx == 1 .or. idx == 2) then - ! vector points along the x axis - dist_vec(1) = side_dists(idx)/abs(side_dists(idx)) - else - ! vector points along the y axis - dist_vec(2) = side_dists(idx)/abs(side_dists(idx)) - end if - ! convert the normal vector back into the global coordinate system - levelset_norm%sf(i, j, 0, ib_patch_id, :) = matmul(rotation, dist_vec) + levelset%sf(i, j, 0, ib_patch_id) = side_dists(idx) + dist_vec = 0._wp + if (.not. f_approx_equal(side_dists(idx), 0._wp)) then + if (idx == 1 .or. idx == 2) then + ! vector points along the x axis + dist_vec(1) = side_dists(idx)/abs(side_dists(idx)) else - levelset_norm%sf(i, j, 0, ib_patch_id, :) = 0._wp + ! vector points along the y axis + dist_vec(2) = side_dists(idx)/abs(side_dists(idx)) end if + ! convert the normal vector back into the global coordinate system + levelset_norm%sf(i, j, 0, ib_patch_id, :) = matmul(rotation, dist_vec) + else + levelset_norm%sf(i, j, 0, ib_patch_id, :) = 0._wp end if - end do + end if end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_rectangle_levelset @@ -371,77 +371,77 @@ contains Front = length_z/2 Back = -length_z/2 - #:call GPU_PARALLEL_LOOP(private='[i,j,k,min_dist,side_dists,xyz_local,dist_vec]', & + $:GPU_PARALLEL_LOOP(private='[i,j,k,min_dist,side_dists,xyz_local,dist_vec]', & & copyin='[ib_patch_id,center,inverse_rotation,rotation,Right,Left,Top,Bottom,Front,Back]', collapse=3) - do i = 0, m - do j = 0, n - do k = 0, p - - xyz_local = [x_cc(i), y_cc(j), z_cc(k)] - center ! get coordinate frame centered on IB - xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinate - - if ((xyz_local(1) > Left .and. xyz_local(1) < Right) .or. & - (xyz_local(2) > Bottom .and. xyz_local(2) < Top) .or. & - (xyz_local(3) > Back .and. xyz_local(3) < Front)) then - - side_dists(1) = Left - xyz_local(1) - side_dists(2) = xyz_local(1) - Right - side_dists(3) = Bottom - xyz_local(2) - side_dists(4) = xyz_local(2) - Top - side_dists(5) = Back - xyz_local(3) - side_dists(6) = xyz_local(3) - Front - min_dist = minval(abs(side_dists)) - - ! TODO :: The way that this is written, it looks like we will - ! trigger at the first size that is close to the minimum distance, - ! meaning corners where side_dists are the same will - ! trigger on what may not actually be the minimum, - ! leading to undesired behavior. This should be resolved - ! and this code should be cleaned up. It also means that - ! rotating the box 90 degrees will cause tests to fail. - dist_vec = 0._wp - if (f_approx_equal(min_dist, abs(side_dists(1)))) then - levelset%sf(i, j, k, ib_patch_id) = side_dists(1) - if (.not. f_approx_equal(side_dists(1), 0._wp)) then - dist_vec(1) = side_dists(1)/abs(side_dists(1)) - end if + do i = 0, m + do j = 0, n + do k = 0, p + + xyz_local = [x_cc(i), y_cc(j), z_cc(k)] - center ! get coordinate frame centered on IB + xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinate + + if ((xyz_local(1) > Left .and. xyz_local(1) < Right) .or. & + (xyz_local(2) > Bottom .and. xyz_local(2) < Top) .or. & + (xyz_local(3) > Back .and. xyz_local(3) < Front)) then + + side_dists(1) = Left - xyz_local(1) + side_dists(2) = xyz_local(1) - Right + side_dists(3) = Bottom - xyz_local(2) + side_dists(4) = xyz_local(2) - Top + side_dists(5) = Back - xyz_local(3) + side_dists(6) = xyz_local(3) - Front + min_dist = minval(abs(side_dists)) + + ! TODO :: The way that this is written, it looks like we will + ! trigger at the first size that is close to the minimum distance, + ! meaning corners where side_dists are the same will + ! trigger on what may not actually be the minimum, + ! leading to undesired behavior. This should be resolved + ! and this code should be cleaned up. It also means that + ! rotating the box 90 degrees will cause tests to fail. + dist_vec = 0._wp + if (f_approx_equal(min_dist, abs(side_dists(1)))) then + levelset%sf(i, j, k, ib_patch_id) = side_dists(1) + if (.not. f_approx_equal(side_dists(1), 0._wp)) then + dist_vec(1) = side_dists(1)/abs(side_dists(1)) + end if - else if (f_approx_equal(min_dist, abs(side_dists(2)))) then - levelset%sf(i, j, k, ib_patch_id) = side_dists(2) - if (.not. f_approx_equal(side_dists(2), 0._wp)) then - dist_vec(1) = -side_dists(2)/abs(side_dists(2)) - end if + else if (f_approx_equal(min_dist, abs(side_dists(2)))) then + levelset%sf(i, j, k, ib_patch_id) = side_dists(2) + if (.not. f_approx_equal(side_dists(2), 0._wp)) then + dist_vec(1) = -side_dists(2)/abs(side_dists(2)) + end if - else if (f_approx_equal(min_dist, abs(side_dists(3)))) then - levelset%sf(i, j, k, ib_patch_id) = side_dists(3) - if (.not. f_approx_equal(side_dists(3), 0._wp)) then - dist_vec(2) = side_dists(3)/abs(side_dists(3)) - end if + else if (f_approx_equal(min_dist, abs(side_dists(3)))) then + levelset%sf(i, j, k, ib_patch_id) = side_dists(3) + if (.not. f_approx_equal(side_dists(3), 0._wp)) then + dist_vec(2) = side_dists(3)/abs(side_dists(3)) + end if - else if (f_approx_equal(min_dist, abs(side_dists(4)))) then - levelset%sf(i, j, k, ib_patch_id) = side_dists(4) - if (.not. f_approx_equal(side_dists(4), 0._wp)) then - dist_vec(2) = -side_dists(4)/abs(side_dists(4)) - end if + else if (f_approx_equal(min_dist, abs(side_dists(4)))) then + levelset%sf(i, j, k, ib_patch_id) = side_dists(4) + if (.not. f_approx_equal(side_dists(4), 0._wp)) then + dist_vec(2) = -side_dists(4)/abs(side_dists(4)) + end if - else if (f_approx_equal(min_dist, abs(side_dists(5)))) then - levelset%sf(i, j, k, ib_patch_id) = side_dists(5) - if (.not. f_approx_equal(side_dists(5), 0._wp)) then - dist_vec(3) = side_dists(5)/abs(side_dists(5)) - end if + else if (f_approx_equal(min_dist, abs(side_dists(5)))) then + levelset%sf(i, j, k, ib_patch_id) = side_dists(5) + if (.not. f_approx_equal(side_dists(5), 0._wp)) then + dist_vec(3) = side_dists(5)/abs(side_dists(5)) + end if - else if (f_approx_equal(min_dist, abs(side_dists(6)))) then - levelset%sf(i, j, k, ib_patch_id) = side_dists(6) - if (.not. f_approx_equal(side_dists(6), 0._wp)) then - dist_vec(3) = -side_dists(6)/abs(side_dists(6)) - end if + else if (f_approx_equal(min_dist, abs(side_dists(6)))) then + levelset%sf(i, j, k, ib_patch_id) = side_dists(6) + if (.not. f_approx_equal(side_dists(6), 0._wp)) then + dist_vec(3) = -side_dists(6)/abs(side_dists(6)) end if - levelset_norm%sf(i, j, k, ib_patch_id, :) = matmul(rotation, dist_vec) end if - end do + levelset_norm%sf(i, j, k, ib_patch_id, :) = matmul(rotation, dist_vec) + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_cuboid_levelset @@ -461,25 +461,25 @@ contains center(2) = patch_ib(ib_patch_id)%y_centroid center(3) = patch_ib(ib_patch_id)%z_centroid - #:call GPU_PARALLEL_LOOP(private='[i,j,k,dist_vec,dist]', & + $:GPU_PARALLEL_LOOP(private='[i,j,k,dist_vec,dist]', & & copyin='[ib_patch_id,center,radius]', collapse=3) - do i = 0, m - do j = 0, n - do k = 0, p - dist_vec(1) = x_cc(i) - center(1) - dist_vec(2) = y_cc(j) - center(2) - dist_vec(3) = z_cc(k) - center(3) - dist = sqrt(sum(dist_vec**2)) - levelset%sf(i, j, k, ib_patch_id) = dist - radius - if (f_approx_equal(dist, 0._wp)) then - levelset_norm%sf(i, j, k, ib_patch_id, :) = (/1, 0, 0/) - else - levelset_norm%sf(i, j, k, ib_patch_id, :) = dist_vec(:)/dist - end if - end do + do i = 0, m + do j = 0, n + do k = 0, p + dist_vec(1) = x_cc(i) - center(1) + dist_vec(2) = y_cc(j) - center(2) + dist_vec(3) = z_cc(k) - center(3) + dist = sqrt(sum(dist_vec**2)) + levelset%sf(i, j, k, ib_patch_id) = dist - radius + if (f_approx_equal(dist, 0._wp)) then + levelset_norm%sf(i, j, k, ib_patch_id, :) = (/1, 0, 0/) + else + levelset_norm%sf(i, j, k, ib_patch_id, :) = dist_vec(:)/dist + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_sphere_levelset @@ -526,41 +526,41 @@ contains dist_surface_vec = (/1, 1, 0/) end if - #:call GPU_PARALLEL_LOOP(private='[i,j,k,side_pos,dist_side,dist_surface,xyz_local]', & + $:GPU_PARALLEL_LOOP(private='[i,j,k,side_pos,dist_side,dist_surface,xyz_local]', & & copyin='[ib_patch_id,center,radius,inverse_rotation,rotation,dist_sides_vec,dist_surface_vec]', collapse=3) - do i = 0, m - do j = 0, n - do k = 0, p - xyz_local = [x_cc(i), y_cc(j), z_cc(k)] - center ! get coordinate frame centered on IB - xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates - - ! get distance to flat edge of cylinder - side_pos = dot_product(xyz_local, dist_sides_vec) - dist_side = min(abs(side_pos - boundary(1)), & - abs(boundary(2) - side_pos)) - ! get distance to curved side of cylinder - dist_surface = norm2(xyz_local*dist_surface_vec) & - - radius - - if (dist_side < abs(dist_surface)) then - ! if the closest edge is flat - levelset%sf(i, j, k, ib_patch_id) = -dist_side - if (f_approx_equal(dist_side, abs(side_pos - boundary(1)))) then - levelset_norm%sf(i, j, k, ib_patch_id, :) = matmul(rotation, -dist_sides_vec) - else - levelset_norm%sf(i, j, k, ib_patch_id, :) = matmul(rotation, dist_sides_vec) - end if + do i = 0, m + do j = 0, n + do k = 0, p + xyz_local = [x_cc(i), y_cc(j), z_cc(k)] - center ! get coordinate frame centered on IB + xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates + + ! get distance to flat edge of cylinder + side_pos = dot_product(xyz_local, dist_sides_vec) + dist_side = min(abs(side_pos - boundary(1)), & + abs(boundary(2) - side_pos)) + ! get distance to curved side of cylinder + dist_surface = norm2(xyz_local*dist_surface_vec) & + - radius + + if (dist_side < abs(dist_surface)) then + ! if the closest edge is flat + levelset%sf(i, j, k, ib_patch_id) = -dist_side + if (f_approx_equal(dist_side, abs(side_pos - boundary(1)))) then + levelset_norm%sf(i, j, k, ib_patch_id, :) = matmul(rotation, -dist_sides_vec) else - levelset%sf(i, j, k, ib_patch_id) = dist_surface - - xyz_local = xyz_local*dist_surface_vec - xyz_local = xyz_local/norm2(xyz_local) - levelset_norm%sf(i, j, k, ib_patch_id, :) = matmul(rotation, xyz_local) + levelset_norm%sf(i, j, k, ib_patch_id, :) = matmul(rotation, dist_sides_vec) end if - end do + else + levelset%sf(i, j, k, ib_patch_id) = dist_surface + + xyz_local = xyz_local*dist_surface_vec + xyz_local = xyz_local/norm2(xyz_local) + levelset_norm%sf(i, j, k, ib_patch_id, :) = matmul(rotation, xyz_local) + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_cylinder_levelset diff --git a/src/common/m_finite_differences.fpp b/src/common/m_finite_differences.fpp index c01953e216..f85d46944b 100644 --- a/src/common/m_finite_differences.fpp +++ b/src/common/m_finite_differences.fpp @@ -18,45 +18,45 @@ contains real(wp) :: divergence - #:call GPU_PARALLEL_LOOP(collapse=3, private='[divergence]') - do x = ix_s%beg, ix_s%end - do y = iy_s%beg, iy_s%end - do z = iz_s%beg, iz_s%end - - if (x == ix_s%beg) then - divergence = (-3._wp*fields(1)%sf(x, y, z) + 4._wp*fields(1)%sf(x + 1, y, z) - fields(1)%sf(x + 2, y, z))/(x_cc(x + 2) - x_cc(x)) - else if (x == ix_s%end) then - divergence = (+3._wp*fields(1)%sf(x, y, z) - 4._wp*fields(1)%sf(x - 1, y, z) + fields(1)%sf(x - 2, y, z))/(x_cc(x) - x_cc(x - 2)) + $:GPU_PARALLEL_LOOP(collapse=3, private='[x,y,z,divergence]') + do x = ix_s%beg, ix_s%end + do y = iy_s%beg, iy_s%end + do z = iz_s%beg, iz_s%end + + if (x == ix_s%beg) then + divergence = (-3._wp*fields(1)%sf(x, y, z) + 4._wp*fields(1)%sf(x + 1, y, z) - fields(1)%sf(x + 2, y, z))/(x_cc(x + 2) - x_cc(x)) + else if (x == ix_s%end) then + divergence = (+3._wp*fields(1)%sf(x, y, z) - 4._wp*fields(1)%sf(x - 1, y, z) + fields(1)%sf(x - 2, y, z))/(x_cc(x) - x_cc(x - 2)) + else + divergence = (fields(1)%sf(x + 1, y, z) - fields(1)%sf(x - 1, y, z))/(x_cc(x + 1) - x_cc(x - 1)) + end if + + if (n > 0) then + if (y == iy_s%beg) then + divergence = divergence + (-3._wp*fields(2)%sf(x, y, z) + 4._wp*fields(2)%sf(x, y + 1, z) - fields(2)%sf(x, y + 2, z))/(y_cc(y + 2) - y_cc(y)) + else if (y == iy_s%end) then + divergence = divergence + (+3._wp*fields(2)%sf(x, y, z) - 4._wp*fields(2)%sf(x, y - 1, z) + fields(2)%sf(x, y - 2, z))/(y_cc(y) - y_cc(y - 2)) else - divergence = (fields(1)%sf(x + 1, y, z) - fields(1)%sf(x - 1, y, z))/(x_cc(x + 1) - x_cc(x - 1)) + divergence = divergence + (fields(2)%sf(x, y + 1, z) - fields(2)%sf(x, y - 1, z))/(y_cc(y + 1) - y_cc(y - 1)) end if + end if - if (n > 0) then - if (y == iy_s%beg) then - divergence = divergence + (-3._wp*fields(2)%sf(x, y, z) + 4._wp*fields(2)%sf(x, y + 1, z) - fields(2)%sf(x, y + 2, z))/(y_cc(y + 2) - y_cc(y)) - else if (y == iy_s%end) then - divergence = divergence + (+3._wp*fields(2)%sf(x, y, z) - 4._wp*fields(2)%sf(x, y - 1, z) + fields(2)%sf(x, y - 2, z))/(y_cc(y) - y_cc(y - 2)) - else - divergence = divergence + (fields(2)%sf(x, y + 1, z) - fields(2)%sf(x, y - 1, z))/(y_cc(y + 1) - y_cc(y - 1)) - end if - end if - - if (p > 0) then - if (z == iz_s%beg) then - divergence = divergence + (-3._wp*fields(3)%sf(x, y, z) + 4._wp*fields(3)%sf(x, y, z + 1) - fields(3)%sf(x, y, z + 2))/(z_cc(z + 2) - z_cc(z)) - else if (z == iz_s%end) then - divergence = divergence + (+3._wp*fields(3)%sf(x, y, z) - 4._wp*fields(3)%sf(x, y, z - 1) + fields(2)%sf(x, y, z - 2))/(z_cc(z) - z_cc(z - 2)) - else - divergence = divergence + (fields(3)%sf(x, y, z + 1) - fields(3)%sf(x, y, z - 1))/(z_cc(z + 1) - z_cc(z - 1)) - end if + if (p > 0) then + if (z == iz_s%beg) then + divergence = divergence + (-3._wp*fields(3)%sf(x, y, z) + 4._wp*fields(3)%sf(x, y, z + 1) - fields(3)%sf(x, y, z + 2))/(z_cc(z + 2) - z_cc(z)) + else if (z == iz_s%end) then + divergence = divergence + (+3._wp*fields(3)%sf(x, y, z) - 4._wp*fields(3)%sf(x, y, z - 1) + fields(2)%sf(x, y, z - 2))/(z_cc(z) - z_cc(z - 2)) + else + divergence = divergence + (fields(3)%sf(x, y, z + 1) - fields(3)%sf(x, y, z - 1))/(z_cc(z + 1) - z_cc(z - 1)) end if + end if - div%sf(x, y, z) = div%sf(x, y, z) + divergence + div%sf(x, y, z) = div%sf(x, y, z) + divergence - end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_fd_divergence diff --git a/src/common/m_ib_patches.fpp b/src/common/m_ib_patches.fpp index c9f422b973..c0d95e6f12 100644 --- a/src/common/m_ib_patches.fpp +++ b/src/common/m_ib_patches.fpp @@ -162,18 +162,18 @@ contains ! that cell. If both queries check out, the primitive variables of ! the current patch are assigned to this cell. - #:call GPU_PARALLEL_LOOP(private='[i,j]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,radius]', collapse=2) - do j = 0, n - do i = 0, m - if ((x_cc(i) - center(1))**2 & - + (y_cc(j) - center(2))**2 <= radius**2) & - then - ib_markers_sf(i, j, 0) = patch_id - end if - end do + do j = 0, n + do i = 0, m + if ((x_cc(i) - center(1))**2 & + + (y_cc(j) - center(2))**2 <= radius**2) & + then + ib_markers_sf(i, j, 0) = patch_id + end if end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_circle @@ -270,62 +270,62 @@ contains end if - #:call GPU_PARALLEL_LOOP(private='[i,j,xy_local,k,f]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j,xy_local,k,f]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,inverse_rotation,ma,ca_in,airfoil_grid_u,airfoil_grid_l]', collapse=2) - do j = 0, n - do i = 0, m - xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] ! get coordinate frame centered on IB - xy_local = matmul(inverse_rotation, xy_local) ! rotate the frame into the IB's coordinates - - if (xy_local(1) >= 0._wp .and. xy_local(1) <= ca_in) then - xa = xy_local(1)/ca_in - if (xa <= pa) then - yc = (ma/pa**2)*(2*pa*xa - xa**2) - dycdxc = (2*ma/pa**2)*(pa - xa) + do j = 0, n + do i = 0, m + xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] ! get coordinate frame centered on IB + xy_local = matmul(inverse_rotation, xy_local) ! rotate the frame into the IB's coordinates + + if (xy_local(1) >= 0._wp .and. xy_local(1) <= ca_in) then + xa = xy_local(1)/ca_in + if (xa <= pa) then + yc = (ma/pa**2)*(2*pa*xa - xa**2) + dycdxc = (2*ma/pa**2)*(pa - xa) + else + yc = (ma/(1 - pa)**2)*(1 - 2*pa + 2*pa*xa - xa**2) + dycdxc = (2*ma/(1 - pa)**2)*(pa - xa) + end if + if (xy_local(2) >= 0._wp) then + k = 1 + do while (airfoil_grid_u(k)%x < xy_local(1) .and. k <= Np) + k = k + 1 + end do + if (f_approx_equal(airfoil_grid_u(k)%x, xy_local(1))) then + if (xy_local(2) <= airfoil_grid_u(k)%y) then + !!IB + ib_markers_sf(i, j, 0) = patch_id + end if else - yc = (ma/(1 - pa)**2)*(1 - 2*pa + 2*pa*xa - xa**2) - dycdxc = (2*ma/(1 - pa)**2)*(pa - xa) - end if - if (xy_local(2) >= 0._wp) then - k = 1 - do while (airfoil_grid_u(k)%x < xy_local(1) .and. k <= Np) - k = k + 1 - end do - if (f_approx_equal(airfoil_grid_u(k)%x, xy_local(1))) then - if (xy_local(2) <= airfoil_grid_u(k)%y) then + f = (airfoil_grid_u(k)%x - xy_local(1))/(airfoil_grid_u(k)%x - airfoil_grid_u(k - 1)%x) + if (xy_local(2) <= ((1._wp - f)*airfoil_grid_u(k)%y + f*airfoil_grid_u(k - 1)%y)) then !!IB - ib_markers_sf(i, j, 0) = patch_id - end if - else - f = (airfoil_grid_u(k)%x - xy_local(1))/(airfoil_grid_u(k)%x - airfoil_grid_u(k - 1)%x) - if (xy_local(2) <= ((1._wp - f)*airfoil_grid_u(k)%y + f*airfoil_grid_u(k - 1)%y)) then + ib_markers_sf(i, j, 0) = patch_id + end if + end if + else + k = 1 + do while (airfoil_grid_l(k)%x < xy_local(1)) + k = k + 1 + end do + if (f_approx_equal(airfoil_grid_l(k)%x, xy_local(1))) then + if (xy_local(2) >= airfoil_grid_l(k)%y) then !!IB - ib_markers_sf(i, j, 0) = patch_id - end if + ib_markers_sf(i, j, 0) = patch_id end if else - k = 1 - do while (airfoil_grid_l(k)%x < xy_local(1)) - k = k + 1 - end do - if (f_approx_equal(airfoil_grid_l(k)%x, xy_local(1))) then - if (xy_local(2) >= airfoil_grid_l(k)%y) then - !!IB - ib_markers_sf(i, j, 0) = patch_id - end if - else - f = (airfoil_grid_l(k)%x - xy_local(1))/(airfoil_grid_l(k)%x - airfoil_grid_l(k - 1)%x) + f = (airfoil_grid_l(k)%x - xy_local(1))/(airfoil_grid_l(k)%x - airfoil_grid_l(k - 1)%x) - if (xy_local(2) >= ((1._wp - f)*airfoil_grid_l(k)%y + f*airfoil_grid_l(k - 1)%y)) then + if (xy_local(2) >= ((1._wp - f)*airfoil_grid_l(k)%y + f*airfoil_grid_l(k - 1)%y)) then !!IB - ib_markers_sf(i, j, 0) = patch_id - end if + ib_markers_sf(i, j, 0) = patch_id end if end if end if - end do + end if end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_airfoil @@ -423,59 +423,59 @@ contains airfoil_grid_l(Np)%y = 0._wp end if - #:call GPU_PARALLEL_LOOP(private='[i,j,l,xyz_local,k,f]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j,l,xyz_local,k,f]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,inverse_rotation,ma,ca_in,airfoil_grid_u,airfoil_grid_l]', collapse=3) - do l = 0, p - do j = 0, n - do i = 0, m - xyz_local = [x_cc(i) - center(1), y_cc(j) - center(2), z_cc(l) - center(3)] ! get coordinate frame centered on IB - xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates - - if (xyz_local(3) >= z_min .and. xyz_local(3) <= z_max) then - - if (xyz_local(1) >= 0._wp .and. xyz_local(1) <= ca_in) then - if (xyz_local(2) >= 0._wp) then - k = 1 - do while (airfoil_grid_u(k)%x < xyz_local(1)) - k = k + 1 - end do - if (f_approx_equal(airfoil_grid_u(k)%x, xyz_local(1))) then - if (xyz_local(2) <= airfoil_grid_u(k)%y) then - !!IB - ib_markers_sf(i, j, l) = patch_id - end if - else - f = (airfoil_grid_u(k)%x - xyz_local(1))/(airfoil_grid_u(k)%x - airfoil_grid_u(k - 1)%x) - if (xyz_local(2) <= ((1._wp - f)*airfoil_grid_u(k)%y + f*airfoil_grid_u(k - 1)%y)) then + do l = 0, p + do j = 0, n + do i = 0, m + xyz_local = [x_cc(i) - center(1), y_cc(j) - center(2), z_cc(l) - center(3)] ! get coordinate frame centered on IB + xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates + + if (xyz_local(3) >= z_min .and. xyz_local(3) <= z_max) then + + if (xyz_local(1) >= 0._wp .and. xyz_local(1) <= ca_in) then + if (xyz_local(2) >= 0._wp) then + k = 1 + do while (airfoil_grid_u(k)%x < xyz_local(1)) + k = k + 1 + end do + if (f_approx_equal(airfoil_grid_u(k)%x, xyz_local(1))) then + if (xyz_local(2) <= airfoil_grid_u(k)%y) then !!IB - ib_markers_sf(i, j, l) = patch_id - end if + ib_markers_sf(i, j, l) = patch_id end if else - k = 1 - do while (airfoil_grid_l(k)%x < xyz_local(1)) - k = k + 1 - end do - if (f_approx_equal(airfoil_grid_l(k)%x, xyz_local(1))) then - if (xyz_local(2) >= airfoil_grid_l(k)%y) then + f = (airfoil_grid_u(k)%x - xyz_local(1))/(airfoil_grid_u(k)%x - airfoil_grid_u(k - 1)%x) + if (xyz_local(2) <= ((1._wp - f)*airfoil_grid_u(k)%y + f*airfoil_grid_u(k - 1)%y)) then !!IB - ib_markers_sf(i, j, l) = patch_id - end if - else - f = (airfoil_grid_l(k)%x - xyz_local(1))/(airfoil_grid_l(k)%x - airfoil_grid_l(k - 1)%x) + ib_markers_sf(i, j, l) = patch_id + end if + end if + else + k = 1 + do while (airfoil_grid_l(k)%x < xyz_local(1)) + k = k + 1 + end do + if (f_approx_equal(airfoil_grid_l(k)%x, xyz_local(1))) then + if (xyz_local(2) >= airfoil_grid_l(k)%y) then + !!IB + ib_markers_sf(i, j, l) = patch_id + end if + else + f = (airfoil_grid_l(k)%x - xyz_local(1))/(airfoil_grid_l(k)%x - airfoil_grid_l(k - 1)%x) - if (xyz_local(2) >= ((1._wp - f)*airfoil_grid_l(k)%y + f*airfoil_grid_l(k - 1)%y)) then + if (xyz_local(2) >= ((1._wp - f)*airfoil_grid_l(k)%y + f*airfoil_grid_l(k - 1)%y)) then !!IB - ib_markers_sf(i, j, l) = patch_id - end if + ib_markers_sf(i, j, l) = patch_id end if end if end if end if - end do + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_3D_airfoil @@ -522,26 +522,26 @@ contains ! domain and verifying whether the current patch has the permission ! to write to that cell. If both queries check out, the primitive ! variables of the current patch are assigned to this cell. - #:call GPU_PARALLEL_LOOP(private='[i,j, xy_local]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j, xy_local]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,length,inverse_rotation,x_cc,y_cc]', collapse=2) - do j = 0, n - do i = 0, m - ! get the x and y coordinates in the local IB frame - xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] - xy_local = matmul(inverse_rotation, xy_local) + do j = 0, n + do i = 0, m + ! get the x and y coordinates in the local IB frame + xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] + xy_local = matmul(inverse_rotation, xy_local) - if (-0.5_wp*length(1) <= xy_local(1) .and. & - 0.5_wp*length(1) >= xy_local(1) .and. & - -0.5_wp*length(2) <= xy_local(2) .and. & - 0.5_wp*length(2) >= xy_local(2)) then + if (-0.5_wp*length(1) <= xy_local(1) .and. & + 0.5_wp*length(1) >= xy_local(1) .and. & + -0.5_wp*length(2) <= xy_local(2) .and. & + 0.5_wp*length(2) >= xy_local(2)) then - ! Updating the patch identities bookkeeping variable - ib_markers_sf(i, j, 0) = patch_id + ! Updating the patch identities bookkeeping variable + ib_markers_sf(i, j, 0) = patch_id - end if - end do + end if end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_rectangle @@ -582,27 +582,27 @@ contains ! and verifying whether the current patch has permission to write to ! that cell. If both queries check out, the primitive variables of ! the current patch are assigned to this cell. - #:call GPU_PARALLEL_LOOP(private='[i,j,k,cart_y,cart_z]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j,k,cart_y,cart_z]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,radius]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - if (grid_geometry == 3) then - call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) - else - cart_y = y_cc(j) - cart_z = z_cc(k) - end if - ! Updating the patch identities bookkeeping variable - if (((x_cc(i) - center(1))**2 & - + (cart_y - center(2))**2 & - + (cart_z - center(3))**2 <= radius**2)) then - ib_markers_sf(i, j, k) = patch_id - end if - end do + do k = 0, p + do j = 0, n + do i = 0, m + if (grid_geometry == 3) then + call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) + else + cart_y = y_cc(j) + cart_z = z_cc(k) + end if + ! Updating the patch identities bookkeeping variable + if (((x_cc(i) - center(1))**2 & + + (cart_y - center(2))**2 & + + (cart_z - center(3))**2 <= radius**2)) then + ib_markers_sf(i, j, k) = patch_id + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_sphere @@ -644,36 +644,36 @@ contains ! and verifying whether the current patch has permission to write to ! to that cell. If both queries check out, the primitive variables ! of the current patch are assigned to this cell. - #:call GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,length,inverse_rotation]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m + do k = 0, p + do j = 0, n + do i = 0, m - if (grid_geometry == 3) then - ! TODO :: This does not work and is not covered by any tests. This should be fixed - call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) - else - cart_y = y_cc(j) - cart_z = z_cc(k) - end if - xyz_local = [x_cc(i), cart_y, cart_z] - center ! get coordinate frame centered on IB - xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates - - if (-0.5*length(1) <= xyz_local(1) .and. & - 0.5*length(1) >= xyz_local(1) .and. & - -0.5*length(2) <= xyz_local(2) .and. & - 0.5*length(2) >= xyz_local(2) .and. & - -0.5*length(3) <= xyz_local(3) .and. & - 0.5*length(3) >= xyz_local(3)) then - - ! Updating the patch identities bookkeeping variable - ib_markers_sf(i, j, k) = patch_id - end if - end do + if (grid_geometry == 3) then + ! TODO :: This does not work and is not covered by any tests. This should be fixed + call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) + else + cart_y = y_cc(j) + cart_z = z_cc(k) + end if + xyz_local = [x_cc(i), cart_y, cart_z] - center ! get coordinate frame centered on IB + xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates + + if (-0.5*length(1) <= xyz_local(1) .and. & + 0.5*length(1) >= xyz_local(1) .and. & + -0.5*length(2) <= xyz_local(2) .and. & + 0.5*length(2) >= xyz_local(2) .and. & + -0.5*length(3) <= xyz_local(3) .and. & + 0.5*length(3) >= xyz_local(3)) then + + ! Updating the patch identities bookkeeping variable + ib_markers_sf(i, j, k) = patch_id + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_cuboid @@ -719,46 +719,46 @@ contains ! domain and verifying whether the current patch has the permission ! to write to that cell. If both queries check out, the primitive ! variables of the current patch are assigned to this cell. - #:call GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,length,radius,inverse_rotation]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m + do k = 0, p + do j = 0, n + do i = 0, m - if (grid_geometry == 3) then - call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) - else - cart_y = y_cc(j) - cart_z = z_cc(k) - end if - xyz_local = [x_cc(i), cart_y, cart_z] - center ! get coordinate frame centered on IB - xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates - - if (((.not. f_is_default(length(1)) .and. & - xyz_local(2)**2 & - + xyz_local(3)**2 <= radius**2 .and. & - -0.5_wp*length(1) <= xyz_local(1) .and. & - 0.5_wp*length(1) >= xyz_local(1)) & - .or. & - (.not. f_is_default(length(2)) .and. & - xyz_local(1)**2 & - + xyz_local(3)**2 <= radius**2 .and. & - -0.5_wp*length(2) <= xyz_local(2) .and. & - 0.5_wp*length(2) >= xyz_local(2)) & - .or. & - (.not. f_is_default(length(3)) .and. & - xyz_local(1)**2 & - + xyz_local(2)**2 <= radius**2 .and. & - -0.5_wp*length(3) <= xyz_local(3) .and. & - 0.5_wp*length(3) >= xyz_local(3)))) then - - ! Updating the patch identities bookkeeping variable - ib_markers_sf(i, j, k) = patch_id - end if - end do + if (grid_geometry == 3) then + call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) + else + cart_y = y_cc(j) + cart_z = z_cc(k) + end if + xyz_local = [x_cc(i), cart_y, cart_z] - center ! get coordinate frame centered on IB + xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates + + if (((.not. f_is_default(length(1)) .and. & + xyz_local(2)**2 & + + xyz_local(3)**2 <= radius**2 .and. & + -0.5_wp*length(1) <= xyz_local(1) .and. & + 0.5_wp*length(1) >= xyz_local(1)) & + .or. & + (.not. f_is_default(length(2)) .and. & + xyz_local(1)**2 & + + xyz_local(3)**2 <= radius**2 .and. & + -0.5_wp*length(2) <= xyz_local(2) .and. & + 0.5_wp*length(2) >= xyz_local(2)) & + .or. & + (.not. f_is_default(length(3)) .and. & + xyz_local(1)**2 & + + xyz_local(2)**2 <= radius**2 .and. & + -0.5_wp*length(3) <= xyz_local(3) .and. & + 0.5_wp*length(3) >= xyz_local(3)))) then + + ! Updating the patch identities bookkeeping variable + ib_markers_sf(i, j, k) = patch_id + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_cylinder diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index cbf5040cd6..7852f46de6 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -757,153 +757,153 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, nVar - r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) - buff_send(r) = q_comm(i)%sf(j + pack_offset, k, l) - end do + $:GPU_PARALLEL_LOOP(collapse=4,private='[r,i,j,k,l]') + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = 1, nVar + r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) + buff_send(r) = q_comm(i)%sf(j + pack_offset, k, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = pb_in(j + pack_offset, k, l, i - nVar, q) - end do + $:GPU_PARALLEL_LOOP(collapse=4,private='[r,i,j,k,l]') + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + (j + buff_size*(k + (n + 1)*l)) + buff_send(r) = pb_in(j + pack_offset, k, l, i - nVar, q) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = mv_in(j + pack_offset, k, l, i - nVar, q) - end do + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + (j + buff_size*(k + (n + 1)*l)) + buff_send(r) = mv_in(j + pack_offset, k, l, i - nVar, q) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if #:elif mpi_dir == 2 - #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do i = 1, nVar - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = q_comm(i)%sf(j, k + pack_offset, l) - end do + $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') + do i = 1, nVar + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = q_comm(i)%sf(j, k + pack_offset, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = pb_in(j, k + pack_offset, l, i - nVar, q) - end do + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = pb_in(j, k + pack_offset, l, i - nVar, q) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP - - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = mv_in(j, k + pack_offset, l, i - nVar, q) - end do + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = mv_in(j, k + pack_offset, l, i - nVar, q) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if #:else - #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do i = 1, nVar - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = q_comm(i)%sf(j, k, l + pack_offset) - end do + $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') + do i = 1, nVar + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = q_comm(i)%sf(j, k, l + pack_offset) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = pb_in(j, k, l + pack_offset, i - nVar, q) - end do + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = pb_in(j, k, l + pack_offset, i - nVar, q) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP - - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = mv_in(j, k, l + pack_offset, i - nVar, q) - end do + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = mv_in(j, k, l + pack_offset, i - nVar, q) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if #:endif end if @@ -958,176 +958,176 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = 1, nVar - r = (i - 1) + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - q_comm(i)%sf(j + unpack_offset, k, l) = buff_recv(r) + $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = 1, nVar + r = (i - 1) + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + q_comm(i)%sf(j + unpack_offset, k, l) = buff_recv(r) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif - end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - pb_in(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r) - end do + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + pb_in(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - mv_in(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r) - end do + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + mv_in(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if #:elif mpi_dir == 2 - #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do i = 1, nVar - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - q_comm(i)%sf(j, k + unpack_offset, l) = buff_recv(r) + $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') + do i = 1, nVar + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + q_comm(i)%sf(j, k + unpack_offset, l) = buff_recv(r) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif - end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - pb_in(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r) - end do + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + pb_in(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP - - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - mv_in(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r) - end do + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + mv_in(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if #:else ! Unpacking buffer from bc_z%beg - #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do i = 1, nVar - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - q_comm(i)%sf(j, k, l + unpack_offset) = buff_recv(r) + $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') + do i = 1, nVar + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + q_comm(i)%sf(j, k, l + unpack_offset) = buff_recv(r) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif - end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - pb_in(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r) - end do + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') + do i = nVar + 1, nVar + 4 + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + pb_in(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP - - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - mv_in(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r) - end do + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') + do i = nVar + 1, nVar + 4 + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + mv_in(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if #:endif end if diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 82d8f41389..e5f5b02788 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -99,177 +99,177 @@ contains integer :: i, j, k, l ! starting equilibrium solver - #:call GPU_PARALLEL_LOOP(collapse=3, private='[p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok,pS, pSOV, pSSL, TS, TSOV, TSatOV, TSatSL, TSSL, rhoe, dynE, rhos, rho, rM, m1, m2, MCT, TvF]') - do j = 0, m - do k = 0, n - do l = 0, p + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok,pS, pSOV, pSSL, TS, TSOV, TSatOV, TSatSL, TSSL, rhoe, dynE, rhos, rho, rM, m1, m2, MCT, TvF]') + do j = 0, m + do k = 0, n + do l = 0, p - rho = 0.0_wp; TvF = 0.0_wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids + rho = 0.0_wp; TvF = 0.0_wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - ! Mixture density - rho = rho + q_cons_vf(i + contxb - 1)%sf(j, k, l) + ! Mixture density + rho = rho + q_cons_vf(i + contxb - 1)%sf(j, k, l) - ! Total Volume Fraction - TvF = TvF + q_cons_vf(i + advxb - 1)%sf(j, k, l) + ! Total Volume Fraction + TvF = TvF + q_cons_vf(i + advxb - 1)%sf(j, k, l) - end do + end do - ! calculating the total reacting mass for the phase change process. By hypothesis, this should not change - ! throughout the phase-change process. - rM = q_cons_vf(lp + contxb - 1)%sf(j, k, l) + q_cons_vf(vp + contxb - 1)%sf(j, k, l) + ! calculating the total reacting mass for the phase change process. By hypothesis, this should not change + ! throughout the phase-change process. + rM = q_cons_vf(lp + contxb - 1)%sf(j, k, l) + q_cons_vf(vp + contxb - 1)%sf(j, k, l) - ! correcting negative (reacting) mass fraction values in case they happen - call s_correct_partial_densities(MCT, q_cons_vf, rM, j, k, l) + ! correcting negative (reacting) mass fraction values in case they happen + call s_correct_partial_densities(MCT, q_cons_vf, rM, j, k, l) - ! fixing m1 and m2 AFTER correcting the partial densities. Note that these values must be stored for the phase - ! change process that will happen a posteriori - m1 = q_cons_vf(lp + contxb - 1)%sf(j, k, l) + ! fixing m1 and m2 AFTER correcting the partial densities. Note that these values must be stored for the phase + ! change process that will happen a posteriori + m1 = q_cons_vf(lp + contxb - 1)%sf(j, k, l) - m2 = q_cons_vf(vp + contxb - 1)%sf(j, k, l) + m2 = q_cons_vf(vp + contxb - 1)%sf(j, k, l) - ! kinetic energy as an auxiliary variable to the calculation of the total internal energy - dynE = 0.0_wp - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, momxe + ! kinetic energy as an auxiliary variable to the calculation of the total internal energy + dynE = 0.0_wp + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, momxe - dynE = dynE + 5.0e-1_wp*q_cons_vf(i)%sf(j, k, l)**2/rho + dynE = dynE + 5.0e-1_wp*q_cons_vf(i)%sf(j, k, l)**2/rho - end do + end do - ! calculating the total energy that MUST be preserved throughout the pT- and pTg-relaxation procedures - ! at each of the cells. The internal energy is calculated as the total energy minus the kinetic - ! energy to preserved its value at sharp interfaces - rhoe = q_cons_vf(E_idx)%sf(j, k, l) - dynE + ! calculating the total energy that MUST be preserved throughout the pT- and pTg-relaxation procedures + ! at each of the cells. The internal energy is calculated as the total energy minus the kinetic + ! energy to preserved its value at sharp interfaces + rhoe = q_cons_vf(E_idx)%sf(j, k, l) - dynE - ! Calling pT-equilibrium for either finishing phase-change module, or as an IC for the pTg-equilibrium - ! for this case, MFL cannot be either 0 or 1, so I chose it to be 2 - call s_infinite_pt_relaxation_k(j, k, l, 2, pS, p_infpT, q_cons_vf, rhoe, TS) + ! Calling pT-equilibrium for either finishing phase-change module, or as an IC for the pTg-equilibrium + ! for this case, MFL cannot be either 0 or 1, so I chose it to be 2 + call s_infinite_pt_relaxation_k(j, k, l, 2, pS, p_infpT, q_cons_vf, rhoe, TS) - ! check if pTg-equilibrium is required - ! NOTE that NOTHING else needs to be updated OTHER than the individual partial densities - ! given the outputs from the pT- and pTg-equilibrium solvers are just p and one of the partial masses - ! (pTg- case) - if ((relax_model == 6) .and. ((q_cons_vf(lp + contxb - 1)%sf(j, k, l) > mixM*rM) & - .and. (q_cons_vf(vp + contxb - 1)%sf(j, k, l) > mixM*rM)) & - .and. (pS < pCr) .and. (TS < TCr)) then + ! check if pTg-equilibrium is required + ! NOTE that NOTHING else needs to be updated OTHER than the individual partial densities + ! given the outputs from the pT- and pTg-equilibrium solvers are just p and one of the partial masses + ! (pTg- case) + if ((relax_model == 6) .and. ((q_cons_vf(lp + contxb - 1)%sf(j, k, l) > mixM*rM) & + .and. (q_cons_vf(vp + contxb - 1)%sf(j, k, l) > mixM*rM)) & + .and. (pS < pCr) .and. (TS < TCr)) then - ! Checking if phase change is needed, by checking whether the final solution is either subcoooled - ! liquid or overheated vapor. + ! Checking if phase change is needed, by checking whether the final solution is either subcoooled + ! liquid or overheated vapor. - ! overheated vapor case - ! depleting the mass of liquid - q_cons_vf(lp + contxb - 1)%sf(j, k, l) = mixM*rM + ! overheated vapor case + ! depleting the mass of liquid + q_cons_vf(lp + contxb - 1)%sf(j, k, l) = mixM*rM - ! transferring the total mass to vapor - q_cons_vf(vp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM + ! transferring the total mass to vapor + q_cons_vf(vp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM - ! calling pT-equilibrium for overheated vapor, which is MFL = 0 - call s_infinite_pt_relaxation_k(j, k, l, 0, pSOV, p_infOV, q_cons_vf, rhoe, TSOV) + ! calling pT-equilibrium for overheated vapor, which is MFL = 0 + call s_infinite_pt_relaxation_k(j, k, l, 0, pSOV, p_infOV, q_cons_vf, rhoe, TSOV) - ! calculating Saturation temperature - call s_TSat(pSOV, TSatOV, TSOV) + ! calculating Saturation temperature + call s_TSat(pSOV, TSatOV, TSOV) - ! subcooled liquid case - ! transferring the total mass to liquid - q_cons_vf(lp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM - - ! depleting the mass of vapor - q_cons_vf(vp + contxb - 1)%sf(j, k, l) = mixM*rM + ! subcooled liquid case + ! transferring the total mass to liquid + q_cons_vf(lp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM - ! calling pT-equilibrium for subcooled liquid, which is MFL = 1 - call s_infinite_pt_relaxation_k(j, k, l, 1, pSSL, p_infSL, q_cons_vf, rhoe, TSSL) + ! depleting the mass of vapor + q_cons_vf(vp + contxb - 1)%sf(j, k, l) = mixM*rM - ! calculating Saturation temperature - call s_TSat(pSSL, TSatSL, TSSL) + ! calling pT-equilibrium for subcooled liquid, which is MFL = 1 + call s_infinite_pt_relaxation_k(j, k, l, 1, pSSL, p_infSL, q_cons_vf, rhoe, TSSL) - ! checking the conditions for overheated vapor and subcooled liquide - if (TSOV > TSatOV) then + ! calculating Saturation temperature + call s_TSat(pSSL, TSatSL, TSSL) - ! Assigning pressure - pS = pSOV + ! checking the conditions for overheated vapor and subcooled liquide + if (TSOV > TSatOV) then - ! Assigning Temperature - TS = TSOV + ! Assigning pressure + pS = pSOV - ! correcting the liquid partial density - q_cons_vf(lp + contxb - 1)%sf(j, k, l) = mixM*rM + ! Assigning Temperature + TS = TSOV - ! correcting the vapor partial density - q_cons_vf(vp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM + ! correcting the liquid partial density + q_cons_vf(lp + contxb - 1)%sf(j, k, l) = mixM*rM - elseif (TSSL < TSatSL) then + ! correcting the vapor partial density + q_cons_vf(vp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM - ! Assigning pressure - pS = pSSL + elseif (TSSL < TSatSL) then - ! Assigning Temperature - TS = TSSL + ! Assigning pressure + pS = pSSL - ! correcting the liquid partial density - q_cons_vf(lp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM + ! Assigning Temperature + TS = TSSL - ! correcting the vapor partial density - q_cons_vf(vp + contxb - 1)%sf(j, k, l) = mixM*rM + ! correcting the liquid partial density + q_cons_vf(lp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM - else + ! correcting the vapor partial density + q_cons_vf(vp + contxb - 1)%sf(j, k, l) = mixM*rM - ! returning partial pressures to what they were from the homogeneous solver - ! liquid - q_cons_vf(lp + contxb - 1)%sf(j, k, l) = m1 + else - ! vapor - q_cons_vf(vp + contxb - 1)%sf(j, k, l) = m2 + ! returning partial pressures to what they were from the homogeneous solver + ! liquid + q_cons_vf(lp + contxb - 1)%sf(j, k, l) = m1 - ! calling the pTg-equilibrium solver - call s_infinite_ptg_relaxation_k(j, k, l, pS, p_infpT, rhoe, q_cons_vf, TS) + ! vapor + q_cons_vf(vp + contxb - 1)%sf(j, k, l) = m2 - end if + ! calling the pTg-equilibrium solver + call s_infinite_ptg_relaxation_k(j, k, l, pS, p_infpT, rhoe, q_cons_vf, TS) end if - ! Calculations AFTER equilibrium + end if + + ! Calculations AFTER equilibrium - ! entropy - sk(1:num_fluids) = cvs(1:num_fluids)*log((TS**gs_min(1:num_fluids)) & - /((pS + ps_inf(1:num_fluids))**(gs_min(1:num_fluids) - 1.0_wp))) + qvps(1:num_fluids) + ! entropy + sk(1:num_fluids) = cvs(1:num_fluids)*log((TS**gs_min(1:num_fluids)) & + /((pS + ps_inf(1:num_fluids))**(gs_min(1:num_fluids) - 1.0_wp))) + qvps(1:num_fluids) - ! enthalpy - hk(1:num_fluids) = gs_min(1:num_fluids)*cvs(1:num_fluids)*TS & - + qvs(1:num_fluids) + ! enthalpy + hk(1:num_fluids) = gs_min(1:num_fluids)*cvs(1:num_fluids)*TS & + + qvs(1:num_fluids) - ! Gibbs-free energy - gk(1:num_fluids) = hk(1:num_fluids) - TS*sk(1:num_fluids) + ! Gibbs-free energy + gk(1:num_fluids) = hk(1:num_fluids) - TS*sk(1:num_fluids) - ! densities - rhok(1:num_fluids) = (pS + ps_inf(1:num_fluids)) & - /((gs_min(1:num_fluids) - 1)*cvs(1:num_fluids)*TS) + ! densities + rhok(1:num_fluids) = (pS + ps_inf(1:num_fluids)) & + /((gs_min(1:num_fluids) - 1)*cvs(1:num_fluids)*TS) - ! internal energy - ek(1:num_fluids) = (pS + gs_min(1:num_fluids) & - *ps_inf(1:num_fluids))/(pS + ps_inf(1:num_fluids)) & - *cvs(1:num_fluids)*TS + qvs(1:num_fluids) + ! internal energy + ek(1:num_fluids) = (pS + gs_min(1:num_fluids) & + *ps_inf(1:num_fluids))/(pS + ps_inf(1:num_fluids)) & + *cvs(1:num_fluids)*TS + qvs(1:num_fluids) - ! calculating volume fractions, internal energies, and total entropy - rhos = 0.0_wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids + ! calculating volume fractions, internal energies, and total entropy + rhos = 0.0_wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - ! volume fractions - q_cons_vf(i + advxb - 1)%sf(j, k, l) = q_cons_vf(i + contxb - 1)%sf(j, k, l)/rhok(i) + ! volume fractions + q_cons_vf(i + advxb - 1)%sf(j, k, l) = q_cons_vf(i + contxb - 1)%sf(j, k, l)/rhok(i) - ! alpha*rho*e - q_cons_vf(i + intxb - 1)%sf(j, k, l) = q_cons_vf(i + contxb - 1)%sf(j, k, l)*ek(i) + ! alpha*rho*e + q_cons_vf(i + intxb - 1)%sf(j, k, l) = q_cons_vf(i + contxb - 1)%sf(j, k, l)*ek(i) - ! Total entropy - rhos = rhos + q_cons_vf(i + contxb - 1)%sf(j, k, l)*sk(i) + ! Total entropy + rhos = rhos + q_cons_vf(i + contxb - 1)%sf(j, k, l)*sk(i) - end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_infinite_relaxation_k diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 4d4bf60fa3..dd9bedb6c3 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -873,299 +873,299 @@ contains end if #:endif - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_K, alpha_rho_K, Re_K, nRtmp, rho_K, gamma_K, pi_inf_K,qv_K, dyn_pres_K, rhoYks, B, T]') - do l = ibounds(3)%beg, ibounds(3)%end - do k = ibounds(2)%beg, ibounds(2)%end - do j = ibounds(1)%beg, ibounds(1)%end - dyn_pres_K = 0._wp - - if (igr) then - if (num_fluids == 1) then - alpha_rho_K(1) = qK_cons_vf(contxb)%sf(j, k, l) - alpha_K(1) = 1._wp - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) - alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) - end do - - alpha_rho_K(num_fluids) = qK_cons_vf(num_fluids)%sf(j, k, l) - alpha_K(num_fluids) = 1._wp - sum(alpha_K(1:num_fluids - 1)) - end if + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,alpha_K, alpha_rho_K, Re_K, nRtmp, rho_K, gamma_K, pi_inf_K,qv_K, dyn_pres_K, rhoYks, B, T]') + do l = ibounds(3)%beg, ibounds(3)%end + do k = ibounds(2)%beg, ibounds(2)%end + do j = ibounds(1)%beg, ibounds(1)%end + dyn_pres_K = 0._wp + + if (igr) then + if (num_fluids == 1) then + alpha_rho_K(1) = qK_cons_vf(contxb)%sf(j, k, l) + alpha_K(1) = 1._wp else $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids + do i = 1, num_fluids - 1 alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) end do + + alpha_rho_K(num_fluids) = qK_cons_vf(num_fluids)%sf(j, k, l) + alpha_K(num_fluids) = 1._wp - sum(alpha_K(1:num_fluids - 1)) end if + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) + alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) + end do + end if - if (model_eqns /= 4) then + if (model_eqns /= 4) then #ifdef MFC_SIMULATION - ! If in simulation, use acc mixture subroutines - if (elasticity) then - call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, alpha_K, & - alpha_rho_K, Re_K, G_K, Gs_vc) - else if (bubbles_euler) then - call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K) - else - call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K) - end if + ! If in simulation, use acc mixture subroutines + if (elasticity) then + call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, alpha_K, & + alpha_rho_K, Re_K, G_K, Gs_vc) + else if (bubbles_euler) then + call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K) + else + call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K) + end if #else - ! If pre-processing, use non acc mixture subroutines - if (elasticity) then - call s_convert_to_mixture_variables(qK_cons_vf, j, k, l, & - rho_K, gamma_K, pi_inf_K, qv_K, Re_K, G_K, fluid_pp(:)%G) - else - call s_convert_to_mixture_variables(qK_cons_vf, j, k, l, & - rho_K, gamma_K, pi_inf_K, qv_K) - end if -#endif + ! If pre-processing, use non acc mixture subroutines + if (elasticity) then + call s_convert_to_mixture_variables(qK_cons_vf, j, k, l, & + rho_K, gamma_K, pi_inf_K, qv_K, Re_K, G_K, fluid_pp(:)%G) + else + call s_convert_to_mixture_variables(qK_cons_vf, j, k, l, & + rho_K, gamma_K, pi_inf_K, qv_K) end if +#endif + end if - if (relativity) then - if (n == 0) then - B(1) = Bx0 - B(2) = qK_cons_vf(B_idx%beg)%sf(j, k, l) - B(3) = qK_cons_vf(B_idx%beg + 1)%sf(j, k, l) - else - B(1) = qK_cons_vf(B_idx%beg)%sf(j, k, l) - B(2) = qK_cons_vf(B_idx%beg + 1)%sf(j, k, l) - B(3) = qK_cons_vf(B_idx%beg + 2)%sf(j, k, l) - end if - B2 = B(1)**2 + B(2)**2 + B(3)**2 - - m2 = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, momxe - m2 = m2 + qK_cons_vf(i)%sf(j, k, l)**2 - end do + if (relativity) then + if (n == 0) then + B(1) = Bx0 + B(2) = qK_cons_vf(B_idx%beg)%sf(j, k, l) + B(3) = qK_cons_vf(B_idx%beg + 1)%sf(j, k, l) + else + B(1) = qK_cons_vf(B_idx%beg)%sf(j, k, l) + B(2) = qK_cons_vf(B_idx%beg + 1)%sf(j, k, l) + B(3) = qK_cons_vf(B_idx%beg + 2)%sf(j, k, l) + end if + B2 = B(1)**2 + B(2)**2 + B(3)**2 - S = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - S = S + qK_cons_vf(momxb + i - 1)%sf(j, k, l)*B(i) - end do + m2 = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, momxe + m2 = m2 + qK_cons_vf(i)%sf(j, k, l)**2 + end do - E = qK_cons_vf(E_idx)%sf(j, k, l) + S = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + S = S + qK_cons_vf(momxb + i - 1)%sf(j, k, l)*B(i) + end do - D = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - D = D + qK_cons_vf(i)%sf(j, k, l) - end do + E = qK_cons_vf(E_idx)%sf(j, k, l) - ! Newton-Raphson - W = E + D - $:GPU_LOOP(parallelism='[seq]') - do iter = 1, relativity_cons_to_prim_max_iter - Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2))) - pres = (W - D*Ga)/((gamma_K + 1)*Ga**2) ! Thermal pressure from EOS - f = W - pres + (1 - 1/(2*Ga**2))*B2 - S**2/(2*W**2) - E - D - - ! The first equation below corrects a typo in (Mignone & Bodo, 2006) - ! m2*W**2 → 2*m2*W**2, which would cancel with the 2* in other terms - ! This corrected version is not used as the second equation empirically converges faster. - ! First equation is kept for further investigation. - ! dGa_dW = -Ga**3 * ( S**2*(3*W**2+3*W*B2+B2**2) + m2*W**2 ) / (W**3 * (W+B2)**3) ! first (corrected) - dGa_dW = -Ga**3*(2*S**2*(3*W**2 + 3*W*B2 + B2**2) + m2*W**2)/(2*W**3*(W + B2)**3) ! second (in paper) - - dp_dW = (Ga*(1 + D*dGa_dW) - 2*W*dGa_dW)/((gamma_K + 1)*Ga**3) - df_dW = 1 - dp_dW + (B2/Ga**3)*dGa_dW + S**2/W**3 - - dW = -f/df_dW - W = W + dW - if (abs(dW) < 1.e-12_wp*W) exit - end do + D = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + D = D + qK_cons_vf(i)%sf(j, k, l) + end do - ! Recalculate pressure using converged W + ! Newton-Raphson + W = E + D + $:GPU_LOOP(parallelism='[seq]') + do iter = 1, relativity_cons_to_prim_max_iter Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2))) - qK_prim_vf(E_idx)%sf(j, k, l) = (W - D*Ga)/((gamma_K + 1)*Ga**2) + pres = (W - D*Ga)/((gamma_K + 1)*Ga**2) ! Thermal pressure from EOS + f = W - pres + (1 - 1/(2*Ga**2))*B2 - S**2/(2*W**2) - E - D + + ! The first equation below corrects a typo in (Mignone & Bodo, 2006) + ! m2*W**2 → 2*m2*W**2, which would cancel with the 2* in other terms + ! This corrected version is not used as the second equation empirically converges faster. + ! First equation is kept for further investigation. + ! dGa_dW = -Ga**3 * ( S**2*(3*W**2+3*W*B2+B2**2) + m2*W**2 ) / (W**3 * (W+B2)**3) ! first (corrected) + dGa_dW = -Ga**3*(2*S**2*(3*W**2 + 3*W*B2 + B2**2) + m2*W**2)/(2*W**3*(W + B2)**3) ! second (in paper) + + dp_dW = (Ga*(1 + D*dGa_dW) - 2*W*dGa_dW)/((gamma_K + 1)*Ga**3) + df_dW = 1 - dp_dW + (B2/Ga**3)*dGa_dW + S**2/W**3 + + dW = -f/df_dW + W = W + dW + if (abs(dW) < 1.e-12_wp*W) exit + end do - ! Recover the other primitive variables - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - qK_prim_vf(momxb + i - 1)%sf(j, k, l) = (qK_cons_vf(momxb + i - 1)%sf(j, k, l) + (S/W)*B(i))/(W + B2) - end do - qK_prim_vf(1)%sf(j, k, l) = D/Ga ! Hard-coded for single-component for now + ! Recalculate pressure using converged W + Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2))) + qK_prim_vf(E_idx)%sf(j, k, l) = (W - D*Ga)/((gamma_K + 1)*Ga**2) - $:GPU_LOOP(parallelism='[seq]') - do i = B_idx%beg, B_idx%end - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) - end do + ! Recover the other primitive variables + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + qK_prim_vf(momxb + i - 1)%sf(j, k, l) = (qK_cons_vf(momxb + i - 1)%sf(j, k, l) + (S/W)*B(i))/(W + B2) + end do + qK_prim_vf(1)%sf(j, k, l) = D/Ga ! Hard-coded for single-component for now - cycle ! skip all the non-relativistic conversions below - end if + $:GPU_LOOP(parallelism='[seq]') + do i = B_idx%beg, B_idx%end + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) + end do - if (chemistry) then - rho_K = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - rho_K = rho_K + max(0._wp, qK_cons_vf(i)%sf(j, k, l)) - end do + cycle ! skip all the non-relativistic conversions below + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - qK_prim_vf(i)%sf(j, k, l) = rho_K - end do + if (chemistry) then + rho_K = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + rho_K = rho_K + max(0._wp, qK_cons_vf(i)%sf(j, k, l)) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - qK_prim_vf(i)%sf(j, k, l) = max(0._wp, qK_cons_vf(i)%sf(j, k, l)/rho_K) - end do - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) - end do - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + qK_prim_vf(i)%sf(j, k, l) = rho_K + end do + + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + qK_prim_vf(i)%sf(j, k, l) = max(0._wp, qK_cons_vf(i)%sf(j, k, l)/rho_K) + end do + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) + end do + end if #ifdef MFC_SIMULATION - rho_K = max(rho_K, sgm_eps) + rho_K = max(rho_K, sgm_eps) #endif + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, momxe + if (model_eqns /= 4) then + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & + /rho_K + dyn_pres_K = dyn_pres_K + 5.e-1_wp*qK_cons_vf(i)%sf(j, k, l) & + *qK_prim_vf(i)%sf(j, k, l) + else + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & + /qK_cons_vf(1)%sf(j, k, l) + end if + end do + + if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = momxb, momxe - if (model_eqns /= 4) then - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & - /rho_K - dyn_pres_K = dyn_pres_K + 5.e-1_wp*qK_cons_vf(i)%sf(j, k, l) & - *qK_prim_vf(i)%sf(j, k, l) - else - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & - /qK_cons_vf(1)%sf(j, k, l) - end if + do i = 1, num_species + rhoYks(i) = qK_cons_vf(chemxb + i - 1)%sf(j, k, l) end do - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - rhoYks(i) = qK_cons_vf(chemxb + i - 1)%sf(j, k, l) - end do - - T = q_T_sf%sf(j, k, l) - end if + T = q_T_sf%sf(j, k, l) + end if - if (mhd) then - if (n == 0) then - pres_mag = 0.5_wp*(Bx0**2 + qK_cons_vf(B_idx%beg)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 1)%sf(j, k, l)**2) - else - pres_mag = 0.5_wp*(qK_cons_vf(B_idx%beg)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 1)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 2)%sf(j, k, l)**2) - end if + if (mhd) then + if (n == 0) then + pres_mag = 0.5_wp*(Bx0**2 + qK_cons_vf(B_idx%beg)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 1)%sf(j, k, l)**2) else - pres_mag = 0._wp + pres_mag = 0.5_wp*(qK_cons_vf(B_idx%beg)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 1)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 2)%sf(j, k, l)**2) end if + else + pres_mag = 0._wp + end if - call s_compute_pressure(qK_cons_vf(E_idx)%sf(j, k, l), & - qK_cons_vf(alf_idx)%sf(j, k, l), & - dyn_pres_K, pi_inf_K, gamma_K, rho_K, & - qv_K, rhoYks, pres, T, pres_mag=pres_mag) + call s_compute_pressure(qK_cons_vf(E_idx)%sf(j, k, l), & + qK_cons_vf(alf_idx)%sf(j, k, l), & + dyn_pres_K, pi_inf_K, gamma_K, rho_K, & + qv_K, rhoYks, pres, T, pres_mag=pres_mag) - qK_prim_vf(E_idx)%sf(j, k, l) = pres + qK_prim_vf(E_idx)%sf(j, k, l) = pres - if (chemistry) then - q_T_sf%sf(j, k, l) = T - end if + if (chemistry) then + q_T_sf%sf(j, k, l) = T + end if - if (bubbles_euler) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - nRtmp(i) = qK_cons_vf(bubrs_vc(i))%sf(j, k, l) - end do + if (bubbles_euler) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb + nRtmp(i) = qK_cons_vf(bubrs_vc(i))%sf(j, k, l) + end do - vftmp = qK_cons_vf(alf_idx)%sf(j, k, l) + vftmp = qK_cons_vf(alf_idx)%sf(j, k, l) - if (qbmm) then - !Get nb (constant across all R0 bins) - nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l) + if (qbmm) then + !Get nb (constant across all R0 bins) + nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l) - !Convert cons to prim - $:GPU_LOOP(parallelism='[seq]') - do i = bubxb, bubxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc - end do - !Need to keep track of nb in the primitive variable list (converted back to true value before output) + !Convert cons to prim + $:GPU_LOOP(parallelism='[seq]') + do i = bubxb, bubxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc + end do + !Need to keep track of nb in the primitive variable list (converted back to true value before output) #ifdef MFC_SIMULATION - qK_prim_vf(bubxb)%sf(j, k, l) = qK_cons_vf(bubxb)%sf(j, k, l) + qK_prim_vf(bubxb)%sf(j, k, l) = qK_cons_vf(bubxb)%sf(j, k, l) #endif + else + if (adv_n) then + qK_prim_vf(n_idx)%sf(j, k, l) = qK_cons_vf(n_idx)%sf(j, k, l) + nbub_sc = qK_prim_vf(n_idx)%sf(j, k, l) else - if (adv_n) then - qK_prim_vf(n_idx)%sf(j, k, l) = qK_cons_vf(n_idx)%sf(j, k, l) - nbub_sc = qK_prim_vf(n_idx)%sf(j, k, l) - else - call s_comp_n_from_cons(vftmp, nRtmp, nbub_sc, weight) - end if - - $:GPU_LOOP(parallelism='[seq]') - do i = bubxb, bubxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc - end do + call s_comp_n_from_cons(vftmp, nRtmp, nbub_sc, weight) end if - end if - if (mhd) then $:GPU_LOOP(parallelism='[seq]') - do i = B_idx%beg, B_idx%end - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) + do i = bubxb, bubxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc end do end if + end if - if (elasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = strxb, strxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K - end do - end if + if (mhd) then + $:GPU_LOOP(parallelism='[seq]') + do i = B_idx%beg, B_idx%end + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) + end do + end if - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = strxb, strxe - ! subtracting elastic contribution for pressure calculation - if (G_K > verysmall) then - if (cont_damage) G_K = G_K*max((1._wp - qK_cons_vf(damage_idx)%sf(j, k, l)), 0._wp) + if (elasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = strxb, strxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K + end do + end if + + if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = strxb, strxe + ! subtracting elastic contribution for pressure calculation + if (G_K > verysmall) then + if (cont_damage) G_K = G_K*max((1._wp - qK_cons_vf(damage_idx)%sf(j, k, l)), 0._wp) + qK_prim_vf(E_idx)%sf(j, k, l) = qK_prim_vf(E_idx)%sf(j, k, l) - & + ((qK_prim_vf(i)%sf(j, k, l)**2._wp)/(4._wp*G_K))/gamma_K + ! Double for shear stresses + if (any(i == shear_indices)) then qK_prim_vf(E_idx)%sf(j, k, l) = qK_prim_vf(E_idx)%sf(j, k, l) - & ((qK_prim_vf(i)%sf(j, k, l)**2._wp)/(4._wp*G_K))/gamma_K - ! Double for shear stresses - if (any(i == shear_indices)) then - qK_prim_vf(E_idx)%sf(j, k, l) = qK_prim_vf(E_idx)%sf(j, k, l) - & - ((qK_prim_vf(i)%sf(j, k, l)**2._wp)/(4._wp*G_K))/gamma_K - end if end if - end do - end if + end if + end do + end if - if (hyperelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = xibeg, xiend - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K - end do - end if + if (hyperelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = xibeg, xiend + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K + end do + end if - if (.not. igr .or. num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) - end do - end if + if (.not. igr .or. num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) + end do + end if - if (surface_tension) then - qK_prim_vf(c_idx)%sf(j, k, l) = qK_cons_vf(c_idx)%sf(j, k, l) - end if + if (surface_tension) then + qK_prim_vf(c_idx)%sf(j, k, l) = qK_cons_vf(c_idx)%sf(j, k, l) + end if - if (cont_damage) qK_prim_vf(damage_idx)%sf(j, k, l) = qK_cons_vf(damage_idx)%sf(j, k, l) + if (cont_damage) qK_prim_vf(damage_idx)%sf(j, k, l) = qK_cons_vf(damage_idx)%sf(j, k, l) #ifdef MFC_POST_PROCESS - if (bubbles_lagrange) qK_prim_vf(beta_idx)%sf(j, k, l) = qK_cons_vf(beta_idx)%sf(j, k, l) + if (bubbles_lagrange) qK_prim_vf(beta_idx)%sf(j, k, l) = qK_cons_vf(beta_idx)%sf(j, k, l) #endif - end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_convert_conservative_to_primitive_variables @@ -1492,113 +1492,113 @@ contains ! Computing the flux variables from the primitive variables, without ! accounting for the contribution of either viscosity or capillarity #ifdef MFC_SIMULATION - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_K, vel_K, alpha_K, Re_K, Y_K]') - do l = is3b, is3e - do k = is2b, is2e - do j = is1b, is1e + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,alpha_rho_K, vel_K, alpha_K, Re_K, Y_K]') + do l = is3b, is3e + do k = is2b, is2e + do j = is1b, is1e - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - alpha_rho_K(i) = qK_prim_vf(j, k, l, i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + alpha_rho_K(i) = qK_prim_vf(j, k, l, i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - alpha_K(i - E_idx) = qK_prim_vf(j, k, l, i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - vel_K(i) = qK_prim_vf(j, k, l, contxe + i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + alpha_K(i - E_idx) = qK_prim_vf(j, k, l, i) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + vel_K(i) = qK_prim_vf(j, k, l, contxe + i) + end do + + vel_K_sum = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + vel_K_sum = vel_K_sum + vel_K(i)**2._wp + end do - vel_K_sum = 0._wp + pres_K = qK_prim_vf(j, k, l, E_idx) + if (elasticity) then + call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K, & + G_K, Gs_vc) + else if (bubbles_euler) then + call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, & + pi_inf_K, qv_K, alpha_K, alpha_rho_K, Re_K) + else + call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K) + end if + + ! Computing the energy from the pressure + + if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - vel_K_sum = vel_K_sum + vel_K(i)**2._wp + do i = chemxb, chemxe + Y_K(i - chemxb + 1) = qK_prim_vf(j, k, l, i) end do + !Computing the energy from the internal energy of the mixture + call get_mixture_molecular_weight(Y_k, mix_mol_weight) + R_gas = gas_constant/mix_mol_weight + T_K = pres_K/rho_K/R_gas + call get_mixture_energy_mass(T_K, Y_K, E_K) + E_K = rho_K*E_K + 5.e-1_wp*rho_K*vel_K_sum + else + ! Computing the energy from the pressure + E_K = gamma_K*pres_K + pi_inf_K & + + 5.e-1_wp*rho_K*vel_K_sum + qv_K + end if - pres_K = qK_prim_vf(j, k, l, E_idx) - if (elasticity) then - call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K, & - G_K, Gs_vc) - else if (bubbles_euler) then - call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, & - pi_inf_K, qv_K, alpha_K, alpha_rho_K, Re_K) - else - call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K) - end if + ! mass flux, this should be \alpha_i \rho_i u_i + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + FK_vf(j, k, l, i) = alpha_rho_K(i)*vel_K(dir_idx(1)) + end do - ! Computing the energy from the pressure + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + FK_vf(j, k, l, contxe + dir_idx(i)) = & + rho_K*vel_K(dir_idx(1)) & + *vel_K(dir_idx(i)) & + + pres_K*dir_flg(dir_idx(i)) + end do - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Y_K(i - chemxb + 1) = qK_prim_vf(j, k, l, i) - end do - !Computing the energy from the internal energy of the mixture - call get_mixture_molecular_weight(Y_k, mix_mol_weight) - R_gas = gas_constant/mix_mol_weight - T_K = pres_K/rho_K/R_gas - call get_mixture_energy_mass(T_K, Y_K, E_K) - E_K = rho_K*E_K + 5.e-1_wp*rho_K*vel_K_sum - else - ! Computing the energy from the pressure - E_K = gamma_K*pres_K + pi_inf_K & - + 5.e-1_wp*rho_K*vel_K_sum + qv_K - end if + ! energy flux, u(E+p) + FK_vf(j, k, l, E_idx) = vel_K(dir_idx(1))*(E_K + pres_K) - ! mass flux, this should be \alpha_i \rho_i u_i + ! Species advection Flux, \rho*u*Y + if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - FK_vf(j, k, l, i) = alpha_rho_K(i)*vel_K(dir_idx(1)) + do i = 1, num_species + FK_vf(j, k, l, i - 1 + chemxb) = vel_K(dir_idx(1))*(rho_K*Y_K(i)) end do + end if + if (riemann_solver == 1 .or. riemann_solver == 4) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - FK_vf(j, k, l, contxe + dir_idx(i)) = & - rho_K*vel_K(dir_idx(1)) & - *vel_K(dir_idx(i)) & - + pres_K*dir_flg(dir_idx(i)) + do i = advxb, advxe + FK_vf(j, k, l, i) = 0._wp + FK_src_vf(j, k, l, i) = alpha_K(i - E_idx) end do - ! energy flux, u(E+p) - FK_vf(j, k, l, E_idx) = vel_K(dir_idx(1))*(E_K + pres_K) - - ! Species advection Flux, \rho*u*Y - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - FK_vf(j, k, l, i - 1 + chemxb) = vel_K(dir_idx(1))*(rho_K*Y_K(i)) - end do - end if - - if (riemann_solver == 1 .or. riemann_solver == 4) then - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - FK_vf(j, k, l, i) = 0._wp - FK_src_vf(j, k, l, i) = alpha_K(i - E_idx) - end do - - else - ! Could be bubbles_euler! - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - FK_vf(j, k, l, i) = vel_K(dir_idx(1))*alpha_K(i - E_idx) - end do + else + ! Could be bubbles_euler! + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + FK_vf(j, k, l, i) = vel_K(dir_idx(1))*alpha_K(i - E_idx) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - FK_src_vf(j, k, l, i) = vel_K(dir_idx(1)) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + FK_src_vf(j, k, l, i) = vel_K(dir_idx(1)) + end do - end if + end if - end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() #endif end subroutine s_convert_primitive_to_flux_variables diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 3da61d2e46..dae9e6ad40 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -166,19 +166,19 @@ contains sim_time = t_step*dt - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - mass_src(j, k, l) = 0._wp - mom_src(1, j, k, l) = 0._wp - e_src(j, k, l) = 0._wp - if (n > 0) mom_src(2, j, k, l) = 0._wp - if (p > 0) mom_src(3, j, k, l) = 0._wp - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + mass_src(j, k, l) = 0._wp + mom_src(1, j, k, l) = 0._wp + e_src(j, k, l) = 0._wp + if (n > 0) mom_src(2, j, k, l) = 0._wp + if (p > 0) mom_src(3, j, k, l) = 0._wp end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() ! Keep outer loop sequel because different sources can have very different number of points do ai = 1, num_source @@ -220,125 +220,125 @@ contains deallocate (phi_rn) - #:call GPU_PARALLEL_LOOP(private='[myalpha,myalpha_rho]') - do i = 1, num_points - j = source_spatials(ai)%coord(1, i) - k = source_spatials(ai)%coord(2, i) - l = source_spatials(ai)%coord(3, i) - - ! Compute speed of sound - myRho = 0._wp - B_tait = 0._wp - small_gamma = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = 1, num_fluids - myalpha_rho(q) = q_cons_vf(q)%sf(j, k, l) - myalpha(q) = q_cons_vf(advxb + q - 1)%sf(j, k, l) - end do - - if (bubbles_euler) then - if (num_fluids > 2) then - $:GPU_LOOP(parallelism='[seq]') - do q = 1, num_fluids - 1 - myRho = myRho + myalpha_rho(q) - B_tait = B_tait + myalpha(q)*pi_infs(q) - small_gamma = small_gamma + myalpha(q)*gammas(q) - end do - else - myRho = myalpha_rho(1) - B_tait = pi_infs(1) - small_gamma = gammas(1) - end if - end if + $:GPU_PARALLEL_LOOP(private='[i,myalpha,myalpha_rho]') + do i = 1, num_points + j = source_spatials(ai)%coord(1, i) + k = source_spatials(ai)%coord(2, i) + l = source_spatials(ai)%coord(3, i) + + ! Compute speed of sound + myRho = 0._wp + B_tait = 0._wp + small_gamma = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = 1, num_fluids + myalpha_rho(q) = q_cons_vf(q)%sf(j, k, l) + myalpha(q) = q_cons_vf(advxb + q - 1)%sf(j, k, l) + end do - if ((.not. bubbles_euler) .or. (mpp_lim .and. (num_fluids > 2))) then + if (bubbles_euler) then + if (num_fluids > 2) then $:GPU_LOOP(parallelism='[seq]') - do q = 1, num_fluids + do q = 1, num_fluids - 1 myRho = myRho + myalpha_rho(q) B_tait = B_tait + myalpha(q)*pi_infs(q) small_gamma = small_gamma + myalpha(q)*gammas(q) end do + else + myRho = myalpha_rho(1) + B_tait = pi_infs(1) + small_gamma = gammas(1) end if + end if - small_gamma = 1._wp/small_gamma + 1._wp - c = sqrt(small_gamma*(q_prim_vf(E_idx)%sf(j, k, l) + ((small_gamma - 1._wp)/small_gamma)*B_tait)/myRho) + if ((.not. bubbles_euler) .or. (mpp_lim .and. (num_fluids > 2))) then + $:GPU_LOOP(parallelism='[seq]') + do q = 1, num_fluids + myRho = myRho + myalpha_rho(q) + B_tait = B_tait + myalpha(q)*pi_infs(q) + small_gamma = small_gamma + myalpha(q)*gammas(q) + end do + end if - ! Wavelength to frequency conversion - if (pulse(ai) == 1 .or. pulse(ai) == 3) frequency_local = f_frequency_local(freq_conv_flag, ai, c) - if (pulse(ai) == 2) gauss_sigma_time_local = f_gauss_sigma_time_local(gauss_conv_flag, ai, c) + small_gamma = 1._wp/small_gamma + 1._wp + c = sqrt(small_gamma*(q_prim_vf(E_idx)%sf(j, k, l) + ((small_gamma - 1._wp)/small_gamma)*B_tait)/myRho) - ! Update momentum source term - call s_source_temporal(sim_time, c, ai, mom_label, frequency_local, gauss_sigma_time_local, source_temporal, sum_BB) - mom_src_diff = source_temporal*source_spatials(ai)%val(i) + ! Wavelength to frequency conversion + if (pulse(ai) == 1 .or. pulse(ai) == 3) frequency_local = f_frequency_local(freq_conv_flag, ai, c) + if (pulse(ai) == 2) gauss_sigma_time_local = f_gauss_sigma_time_local(gauss_conv_flag, ai, c) - if (dipole(ai)) then ! Double amplitude & No momentum source term (only works for Planar) - mass_src(j, k, l) = mass_src(j, k, l) + 2._wp*mom_src_diff/c - if (model_eqns /= 4) E_src(j, k, l) = E_src(j, k, l) + 2._wp*mom_src_diff*c/(small_gamma - 1._wp) - cycle - end if + ! Update momentum source term + call s_source_temporal(sim_time, c, ai, mom_label, frequency_local, gauss_sigma_time_local, source_temporal, sum_BB) + mom_src_diff = source_temporal*source_spatials(ai)%val(i) - if (n == 0) then ! 1D - mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*sign(1._wp, dir(ai)) ! Left or right-going wave - - elseif (p == 0) then ! 2D - if (support(ai) < 5) then ! Planar - mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(dir(ai)) - mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(dir(ai)) - else - mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(source_spatials(ai)%angle(i)) - mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(source_spatials(ai)%angle(i)) - end if - - else ! 3D - if (support(ai) < 5) then ! Planar - mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(dir(ai)) - mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(dir(ai)) - else - mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(1, i) - mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(2, i) - mom_src(3, j, k, l) = mom_src(3, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(3, i) - end if - end if + if (dipole(ai)) then ! Double amplitude & No momentum source term (only works for Planar) + mass_src(j, k, l) = mass_src(j, k, l) + 2._wp*mom_src_diff/c + if (model_eqns /= 4) E_src(j, k, l) = E_src(j, k, l) + 2._wp*mom_src_diff*c/(small_gamma - 1._wp) + cycle + end if - ! Update mass source term + if (n == 0) then ! 1D + mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*sign(1._wp, dir(ai)) ! Left or right-going wave + + elseif (p == 0) then ! 2D if (support(ai) < 5) then ! Planar - mass_src_diff = mom_src_diff/c - else ! Spherical or cylindrical support - ! Mass source term must be calculated differently using a correction term for spherical and cylindrical support - call s_source_temporal(sim_time, c, ai, mass_label, frequency_local, gauss_sigma_time_local, source_temporal, sum_BB) - mass_src_diff = source_temporal*source_spatials(ai)%val(i) + mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(dir(ai)) + mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(dir(ai)) + else + mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(source_spatials(ai)%angle(i)) + mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(source_spatials(ai)%angle(i)) end if - mass_src(j, k, l) = mass_src(j, k, l) + mass_src_diff - ! Update energy source term - if (model_eqns /= 4) then - E_src(j, k, l) = E_src(j, k, l) + mass_src_diff*c**2._wp/(small_gamma - 1._wp) + else ! 3D + if (support(ai) < 5) then ! Planar + mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(dir(ai)) + mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(dir(ai)) + else + mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(1, i) + mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(2, i) + mom_src(3, j, k, l) = mom_src(3, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(3, i) end if + end if + + ! Update mass source term + if (support(ai) < 5) then ! Planar + mass_src_diff = mom_src_diff/c + else ! Spherical or cylindrical support + ! Mass source term must be calculated differently using a correction term for spherical and cylindrical support + call s_source_temporal(sim_time, c, ai, mass_label, frequency_local, gauss_sigma_time_local, source_temporal, sum_BB) + mass_src_diff = source_temporal*source_spatials(ai)%val(i) + end if + mass_src(j, k, l) = mass_src(j, k, l) + mass_src_diff + + ! Update energy source term + if (model_eqns /= 4) then + E_src(j, k, l) = E_src(j, k, l) + mass_src_diff*c**2._wp/(small_gamma - 1._wp) + end if - end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end do ! Update the rhs variables - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do q = contxb, contxe - rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mass_src(j, k, l) - end do - $:GPU_LOOP(parallelism='[seq]') - do q = momxb, momxe - rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mom_src(q - contxe, j, k, l) - end do - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + e_src(j, k, l) + $:GPU_PARALLEL_LOOP(private='[j,k,l]',collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do q = contxb, contxe + rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mass_src(j, k, l) + end do + $:GPU_LOOP(parallelism='[seq]') + do q = momxb, momxe + rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mom_src(q - contxe, j, k, l) end do + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + e_src(j, k, l) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_acoustic_src_calculations !> This subroutine gives the temporally varying amplitude of the pulse diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index c5ba29c592..fcfafcbe31 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -73,19 +73,19 @@ contains type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf integer :: i, j, k, l !< standard iterators - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhoM(j, k, l) = 0._wp - do i = 1, num_fluids - rhoM(j, k, l) = rhoM(j, k, l) + & - q_cons_vf(contxb + i - 1)%sf(j, k, l) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + rhoM(j, k, l) = 0._wp + do i = 1, num_fluids + rhoM(j, k, l) = rhoM(j, k, l) + & + q_cons_vf(contxb + i - 1)%sf(j, k, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_mixture_density @@ -104,64 +104,64 @@ contains call s_compute_acceleration(mytime) call s_compute_mixture_density(q_cons_vf) - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = momxb, E_idx - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(i)%sf(j, k, l) = 0._wp - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = momxb, E_idx + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(i)%sf(j, k, l) = 0._wp end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (bf_x) then ! x-direction body forces - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - rhoM(j, k, l)*accel_bf(1) - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - q_cons_vf(momxb)%sf(j, k, l)*accel_bf(1) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + rhoM(j, k, l)*accel_bf(1) + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + q_cons_vf(momxb)%sf(j, k, l)*accel_bf(1) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (bf_y) then ! y-direction body forces - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - rhoM(j, k, l)*accel_bf(2) - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - q_cons_vf(momxb + 1)%sf(j, k, l)*accel_bf(2) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + rhoM(j, k, l)*accel_bf(2) + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + q_cons_vf(momxb + 1)%sf(j, k, l)*accel_bf(2) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (bf_z) then ! z-direction body forces - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(momxe)%sf(j, k, l) = rhs_vf(momxe)%sf(j, k, l) + & - (rhoM(j, k, l))*accel_bf(3) - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - q_cons_vf(momxe)%sf(j, k, l)*accel_bf(3) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(momxe)%sf(j, k, l) = rhs_vf(momxe)%sf(j, k, l) + & + (rhoM(j, k, l))*accel_bf(3) + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + q_cons_vf(momxe)%sf(j, k, l)*accel_bf(3) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index f198d2e78c..0895a98918 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -76,20 +76,20 @@ contains real(wp) :: nR3bar integer(wp) :: i, j, k, l - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - nR3bar = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - nR3bar = nR3bar + weight(i)*(q_cons_vf(rs(i))%sf(j, k, l))**3._wp - end do - q_cons_vf(alf_idx)%sf(j, k, l) = (4._wp*pi*nR3bar)/(3._wp*q_cons_vf(n_idx)%sf(j, k, l)**2._wp) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + nR3bar = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb + nR3bar = nR3bar + weight(i)*(q_cons_vf(rs(i))%sf(j, k, l))**3._wp end do + q_cons_vf(alf_idx)%sf(j, k, l) = (4._wp*pi*nR3bar)/(3._wp*q_cons_vf(n_idx)%sf(j, k, l)**2._wp) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_comp_alpha_from_n @@ -104,50 +104,50 @@ contains if (idir == 1) then if (.not. qbmm) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - divu_in%sf(j, k, l) = 0._wp - divu_in%sf(j, k, l) = & - 5.e-1_wp/dx(j)*(q_prim_vf(contxe + idir)%sf(j + 1, k, l) - & - q_prim_vf(contxe + idir)%sf(j - 1, k, l)) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + divu_in%sf(j, k, l) = 0._wp + divu_in%sf(j, k, l) = & + 5.e-1_wp/dx(j)*(q_prim_vf(contxe + idir)%sf(j + 1, k, l) - & + q_prim_vf(contxe + idir)%sf(j - 1, k, l)) - end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if elseif (idir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - divu_in%sf(j, k, l) = divu_in%sf(j, k, l) + & - 5.e-1_wp/dy(k)*(q_prim_vf(contxe + idir)%sf(j, k + 1, l) - & - q_prim_vf(contxe + idir)%sf(j, k - 1, l)) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + divu_in%sf(j, k, l) = divu_in%sf(j, k, l) + & + 5.e-1_wp/dy(k)*(q_prim_vf(contxe + idir)%sf(j, k + 1, l) - & + q_prim_vf(contxe + idir)%sf(j, k - 1, l)) - end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() elseif (idir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - divu_in%sf(j, k, l) = divu_in%sf(j, k, l) + & - 5.e-1_wp/dz(l)*(q_prim_vf(contxe + idir)%sf(j, k, l + 1) - & - q_prim_vf(contxe + idir)%sf(j, k, l - 1)) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + divu_in%sf(j, k, l) = divu_in%sf(j, k, l) + & + 5.e-1_wp/dz(l)*(q_prim_vf(contxe + idir)%sf(j, k, l + 1) - & + q_prim_vf(contxe + idir)%sf(j, k, l - 1)) - end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if @@ -177,180 +177,180 @@ contains integer :: dmBub_id !< Dummy variables for unified subgrid bubble subroutines real(wp) :: dmMass_v, dmMass_n, dmBeta_c, dmBeta_t, dmCson - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - bub_adv_src(j, k, l) = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - bub_r_src(j, k, l, q) = 0._wp - bub_v_src(j, k, l, q) = 0._wp - bub_p_src(j, k, l, q) = 0._wp - bub_m_src(j, k, l, q) = 0._wp - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + bub_adv_src(j, k, l) = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb + bub_r_src(j, k, l, q) = 0._wp + bub_v_src(j, k, l, q) = 0._wp + bub_p_src(j, k, l, q) = 0._wp + bub_m_src(j, k, l, q) = 0._wp end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() adap_dt_stop_max = 0 - #:call GPU_PARALLEL_LOOP(collapse=3, private='[Rtmp, Vtmp, myalpha_rho, myalpha]', & + $:GPU_PARALLEL_LOOP(private='[j,k,l,q,Rtmp, Vtmp, myalpha_rho, myalpha]', collapse=3, & & reduction='[[adap_dt_stop_max]]', reductionOp='[MAX]', & & copy='[adap_dt_stop_max]') - do l = 0, p - do k = 0, n - do j = 0, m + do l = 0, p + do k = 0, n + do j = 0, m - if (adv_n) then - nbub = q_prim_vf(n_idx)%sf(j, k, l) - else - $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - Rtmp(q) = q_prim_vf(rs(q))%sf(j, k, l) - Vtmp(q) = q_prim_vf(vs(q))%sf(j, k, l) - end do + if (adv_n) then + nbub = q_prim_vf(n_idx)%sf(j, k, l) + else + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb + Rtmp(q) = q_prim_vf(rs(q))%sf(j, k, l) + Vtmp(q) = q_prim_vf(vs(q))%sf(j, k, l) + end do - R3 = 0._wp + R3 = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - R3 = R3 + weight(q)*Rtmp(q)**3._wp - end do + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb + R3 = R3 + weight(q)*Rtmp(q)**3._wp + end do - nbub = (3._wp/(4._wp*pi))*q_prim_vf(alf_idx)%sf(j, k, l)/R3 - end if + nbub = (3._wp/(4._wp*pi))*q_prim_vf(alf_idx)%sf(j, k, l)/R3 + end if - if (.not. adap_dt) then - R2Vav = 0._wp + if (.not. adap_dt) then + R2Vav = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - R2Vav = R2Vav + weight(q)*Rtmp(q)**2._wp*Vtmp(q) - end do + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb + R2Vav = R2Vav + weight(q)*Rtmp(q)**2._wp*Vtmp(q) + end do - bub_adv_src(j, k, l) = 4._wp*pi*nbub*R2Vav - end if + bub_adv_src(j, k, l) = 4._wp*pi*nbub*R2Vav + end if + + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb + do ii = 1, num_fluids + myalpha_rho(ii) = q_cons_vf(ii)%sf(j, k, l) + myalpha(ii) = q_cons_vf(advxb + ii - 1)%sf(j, k, l) + end do + myRho = 0._wp + n_tait = 0._wp + B_tait = 0._wp + + if (mpp_lim .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do ii = 1, num_fluids - myalpha_rho(ii) = q_cons_vf(ii)%sf(j, k, l) - myalpha(ii) = q_cons_vf(advxb + ii - 1)%sf(j, k, l) + myRho = myRho + myalpha_rho(ii) + n_tait = n_tait + myalpha(ii)*gammas(ii) + B_tait = B_tait + myalpha(ii)*pi_infs(ii) + end do + else if (num_fluids > 2) then + $:GPU_LOOP(parallelism='[seq]') + do ii = 1, num_fluids - 1 + myRho = myRho + myalpha_rho(ii) + n_tait = n_tait + myalpha(ii)*gammas(ii) + B_tait = B_tait + myalpha(ii)*pi_infs(ii) end do + else + myRho = myalpha_rho(1) + n_tait = gammas(1) + B_tait = pi_infs(1)/pi_fac + end if - myRho = 0._wp - n_tait = 0._wp - B_tait = 0._wp - - if (mpp_lim .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do ii = 1, num_fluids - myRho = myRho + myalpha_rho(ii) - n_tait = n_tait + myalpha(ii)*gammas(ii) - B_tait = B_tait + myalpha(ii)*pi_infs(ii) - end do - else if (num_fluids > 2) then - $:GPU_LOOP(parallelism='[seq]') - do ii = 1, num_fluids - 1 - myRho = myRho + myalpha_rho(ii) - n_tait = n_tait + myalpha(ii)*gammas(ii) - B_tait = B_tait + myalpha(ii)*pi_infs(ii) - end do - else - myRho = myalpha_rho(1) - n_tait = gammas(1) - B_tait = pi_infs(1)/pi_fac - end if + n_tait = 1._wp/n_tait + 1._wp !make this the usual little 'gamma' + B_tait = B_tait*(n_tait - 1)/n_tait ! make this the usual pi_inf - n_tait = 1._wp/n_tait + 1._wp !make this the usual little 'gamma' - B_tait = B_tait*(n_tait - 1)/n_tait ! make this the usual pi_inf + myRho = q_prim_vf(1)%sf(j, k, l) + myP = q_prim_vf(E_idx)%sf(j, k, l) + alf = q_prim_vf(alf_idx)%sf(j, k, l) + myR = q_prim_vf(rs(q))%sf(j, k, l) + myV = q_prim_vf(vs(q))%sf(j, k, l) - myRho = q_prim_vf(1)%sf(j, k, l) - myP = q_prim_vf(E_idx)%sf(j, k, l) - alf = q_prim_vf(alf_idx)%sf(j, k, l) - myR = q_prim_vf(rs(q))%sf(j, k, l) - myV = q_prim_vf(vs(q))%sf(j, k, l) + if (.not. polytropic) then + pb_local = q_prim_vf(ps(q))%sf(j, k, l) + mv_local = q_prim_vf(ms(q))%sf(j, k, l) + call s_bwproperty(pb_local, q, chi_vw, k_mw, rho_mw) + call s_vflux(myR, myV, pb_local, mv_local, q, vflux) + pbdot = f_bpres_dot(vflux, myR, myV, pb_local, mv_local, q) - if (.not. polytropic) then - pb_local = q_prim_vf(ps(q))%sf(j, k, l) - mv_local = q_prim_vf(ms(q))%sf(j, k, l) - call s_bwproperty(pb_local, q, chi_vw, k_mw, rho_mw) - call s_vflux(myR, myV, pb_local, mv_local, q, vflux) - pbdot = f_bpres_dot(vflux, myR, myV, pb_local, mv_local, q) - - bub_p_src(j, k, l, q) = nbub*pbdot - bub_m_src(j, k, l, q) = nbub*vflux*4._wp*pi*(myR**2._wp) - else - pb_local = 0._wp; mv_local = 0._wp; vflux = 0._wp; pbdot = 0._wp - end if - - ! Adaptive time stepping - adap_dt_stop = 0 - - if (adap_dt) then + bub_p_src(j, k, l, q) = nbub*pbdot + bub_m_src(j, k, l, q) = nbub*vflux*4._wp*pi*(myR**2._wp) + else + pb_local = 0._wp; mv_local = 0._wp; vflux = 0._wp; pbdot = 0._wp + end if - call s_advance_step(myRho, myP, myR, myV, R0(q), & - pb_local, pbdot, alf, n_tait, B_tait, & - bub_adv_src(j, k, l), divu_in%sf(j, k, l), & - dmBub_id, dmMass_v, dmMass_n, dmBeta_c, & - dmBeta_t, dmCson, adap_dt_stop) + ! Adaptive time stepping + adap_dt_stop = 0 - q_cons_vf(rs(q))%sf(j, k, l) = nbub*myR - q_cons_vf(vs(q))%sf(j, k, l) = nbub*myV + if (adap_dt) then - else - rddot = f_rddot(myRho, myP, myR, myV, R0(q), & + call s_advance_step(myRho, myP, myR, myV, R0(q), & pb_local, pbdot, alf, n_tait, B_tait, & bub_adv_src(j, k, l), divu_in%sf(j, k, l), & - dmCson) - bub_v_src(j, k, l, q) = nbub*rddot - bub_r_src(j, k, l, q) = q_cons_vf(vs(q))%sf(j, k, l) - end if + dmBub_id, dmMass_v, dmMass_n, dmBeta_c, & + dmBeta_t, dmCson, adap_dt_stop) - adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) + q_cons_vf(rs(q))%sf(j, k, l) = nbub*myR + q_cons_vf(vs(q))%sf(j, k, l) = nbub*myV - if (alf < 1.e-11_wp) then - bub_adv_src(j, k, l) = 0._wp - bub_r_src(j, k, l, q) = 0._wp - bub_v_src(j, k, l, q) = 0._wp - if (.not. polytropic) then - bub_p_src(j, k, l, q) = 0._wp - bub_m_src(j, k, l, q) = 0._wp - end if + else + rddot = f_rddot(myRho, myP, myR, myV, R0(q), & + pb_local, pbdot, alf, n_tait, B_tait, & + bub_adv_src(j, k, l), divu_in%sf(j, k, l), & + dmCson) + bub_v_src(j, k, l, q) = nbub*rddot + bub_r_src(j, k, l, q) = q_cons_vf(vs(q))%sf(j, k, l) + end if + + adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) + + if (alf < 1.e-11_wp) then + bub_adv_src(j, k, l) = 0._wp + bub_r_src(j, k, l, q) = 0._wp + bub_v_src(j, k, l, q) = 0._wp + if (.not. polytropic) then + bub_p_src(j, k, l, q) = 0._wp + bub_m_src(j, k, l, q) = 0._wp end if - end do + end if end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") if (.not. adap_dt) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do q = 0, n - do i = 0, m - rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + bub_adv_src(i, q, l) - if (num_fluids > 1) rhs_vf(advxb)%sf(i, q, l) = & - rhs_vf(advxb)%sf(i, q, l) - bub_adv_src(i, q, l) - $:GPU_LOOP(parallelism='[seq]') - do k = 1, nb - rhs_vf(rs(k))%sf(i, q, l) = rhs_vf(rs(k))%sf(i, q, l) + bub_r_src(i, q, l, k) - rhs_vf(vs(k))%sf(i, q, l) = rhs_vf(vs(k))%sf(i, q, l) + bub_v_src(i, q, l, k) - if (polytropic .neqv. .true.) then - rhs_vf(ps(k))%sf(i, q, l) = rhs_vf(ps(k))%sf(i, q, l) + bub_p_src(i, q, l, k) - rhs_vf(ms(k))%sf(i, q, l) = rhs_vf(ms(k))%sf(i, q, l) + bub_m_src(i, q, l, k) - end if - end do + $:GPU_PARALLEL_LOOP(private='[i,k,l,q]', collapse=3) + do l = 0, p + do q = 0, n + do i = 0, m + rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + bub_adv_src(i, q, l) + if (num_fluids > 1) rhs_vf(advxb)%sf(i, q, l) = & + rhs_vf(advxb)%sf(i, q, l) - bub_adv_src(i, q, l) + $:GPU_LOOP(parallelism='[seq]') + do k = 1, nb + rhs_vf(rs(k))%sf(i, q, l) = rhs_vf(rs(k))%sf(i, q, l) + bub_r_src(i, q, l, k) + rhs_vf(vs(k))%sf(i, q, l) = rhs_vf(vs(k))%sf(i, q, l) + bub_v_src(i, q, l, k) + if (polytropic .neqv. .true.) then + rhs_vf(ps(k))%sf(i, q, l) = rhs_vf(ps(k))%sf(i, q, l) + bub_p_src(i, q, l, k) + rhs_vf(ms(k))%sf(i, q, l) = rhs_vf(ms(k))%sf(i, q, l) + bub_m_src(i, q, l, k) + end if end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_compute_bubble_EE_source diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index c205b35f3f..210c43f7e2 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -592,107 +592,107 @@ contains ! Subgrid p_inf model based on Maeda and Colonius (2018). if (lag_params%pressure_corrector) then ! Calculate velocity potentials (valid for one bubble per cell) - #:call GPU_PARALLEL_LOOP(private='[k,cell]') - do k = 1, nBubs - call s_get_pinf(k, q_prim_vf, 2, paux, cell, preterm1, term2, Romega) - myR0 = bub_R0(k) - myR = intfc_rad(k, 2) - myV = intfc_vel(k, 2) - myPb = gas_p(k, 2) - pint = f_cpbw_KM(myR0, myR, myV, myPb) - pint = pint + 0.5_wp*myV**2._wp - if (lag_params%cluster_type == 2) then - bub_dphidt(k) = (paux - pint) + term2 - ! Accounting for the potential induced by the bubble averaged over the control volume - ! Note that this is based on the incompressible flow assumption near the bubble. - term1_fac = 3._wp/2._wp*(myR*(Romega**2._wp - myR**2._wp))/(Romega**3._wp - myR**3._wp) - bub_dphidt(k) = bub_dphidt(k)/(1._wp - term1_fac) - end if - end do - #:endcall GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(private='[k,cell]') + do k = 1, nBubs + call s_get_pinf(k, q_prim_vf, 2, paux, cell, preterm1, term2, Romega) + myR0 = bub_R0(k) + myR = intfc_rad(k, 2) + myV = intfc_vel(k, 2) + myPb = gas_p(k, 2) + pint = f_cpbw_KM(myR0, myR, myV, myPb) + pint = pint + 0.5_wp*myV**2._wp + if (lag_params%cluster_type == 2) then + bub_dphidt(k) = (paux - pint) + term2 + ! Accounting for the potential induced by the bubble averaged over the control volume + ! Note that this is based on the incompressible flow assumption near the bubble. + term1_fac = 3._wp/2._wp*(myR*(Romega**2._wp - myR**2._wp))/(Romega**3._wp - myR**3._wp) + bub_dphidt(k) = bub_dphidt(k)/(1._wp - term1_fac) + end if + end do + $:END_GPU_PARALLEL_LOOP() end if ! Radial motion model adap_dt_stop_max = 0 - #:call GPU_PARALLEL_LOOP(private='[k,myalpha_rho,myalpha,Re,cell]', & + $:GPU_PARALLEL_LOOP(private='[k,i,myalpha_rho,myalpha,Re,cell]', & & reduction='[[adap_dt_stop_max]]',reductionOp='[MAX]', & & copy='[adap_dt_stop_max]',copyin='[stage]') - do k = 1, nBubs - ! Keller-Miksis model - - ! Current bubble state - myPb = gas_p(k, 2) - myMass_n = gas_mg(k) - myMass_v = gas_mv(k, 2) - myR = intfc_rad(k, 2) - myV = intfc_vel(k, 2) - myBeta_c = gas_betaC(k) - myBeta_t = gas_betaT(k) - myR0 = bub_R0(k) - - ! Vapor and heat fluxes - call s_vflux(myR, myV, myPb, myMass_v, k, myVapFlux, myMass_n, myBeta_c, myR_m, mygamma_m) - myPbdot = f_bpres_dot(myVapFlux, myR, myV, myPb, myMass_v, k, myBeta_t, myR_m, mygamma_m) - myMvdot = 4._wp*pi*myR**2._wp*myVapFlux - - ! Obtaining driving pressure - call s_get_pinf(k, q_prim_vf, 1, myPinf, cell, aux1, aux2) - - ! Obtain liquid density and computing speed of sound from pinf - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - myalpha_rho(i) = q_prim_vf(i)%sf(cell(1), cell(2), cell(3)) - myalpha(i) = q_prim_vf(E_idx + i)%sf(cell(1), cell(2), cell(3)) - end do - call s_convert_species_to_mixture_variables_acc(myRho, gamma, pi_inf, qv, myalpha, & - myalpha_rho, Re) - call s_compute_cson_from_pinf(q_prim_vf, myPinf, cell, myRho, gamma, pi_inf, myCson) + do k = 1, nBubs + ! Keller-Miksis model + + ! Current bubble state + myPb = gas_p(k, 2) + myMass_n = gas_mg(k) + myMass_v = gas_mv(k, 2) + myR = intfc_rad(k, 2) + myV = intfc_vel(k, 2) + myBeta_c = gas_betaC(k) + myBeta_t = gas_betaT(k) + myR0 = bub_R0(k) + + ! Vapor and heat fluxes + call s_vflux(myR, myV, myPb, myMass_v, k, myVapFlux, myMass_n, myBeta_c, myR_m, mygamma_m) + myPbdot = f_bpres_dot(myVapFlux, myR, myV, myPb, myMass_v, k, myBeta_t, myR_m, mygamma_m) + myMvdot = 4._wp*pi*myR**2._wp*myVapFlux + + ! Obtaining driving pressure + call s_get_pinf(k, q_prim_vf, 1, myPinf, cell, aux1, aux2) + + ! Obtain liquid density and computing speed of sound from pinf + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + myalpha_rho(i) = q_prim_vf(i)%sf(cell(1), cell(2), cell(3)) + myalpha(i) = q_prim_vf(E_idx + i)%sf(cell(1), cell(2), cell(3)) + end do + call s_convert_species_to_mixture_variables_acc(myRho, gamma, pi_inf, qv, myalpha, & + myalpha_rho, Re) + call s_compute_cson_from_pinf(q_prim_vf, myPinf, cell, myRho, gamma, pi_inf, myCson) - ! Adaptive time stepping - adap_dt_stop = 0 + ! Adaptive time stepping + adap_dt_stop = 0 - if (adap_dt) then + if (adap_dt) then - call s_advance_step(myRho, myPinf, myR, myV, myR0, myPb, myPbdot, dmalf, & - dmntait, dmBtait, dm_bub_adv_src, dm_divu, & - k, myMass_v, myMass_n, myBeta_c, & - myBeta_t, myCson, adap_dt_stop) + call s_advance_step(myRho, myPinf, myR, myV, myR0, myPb, myPbdot, dmalf, & + dmntait, dmBtait, dm_bub_adv_src, dm_divu, & + k, myMass_v, myMass_n, myBeta_c, & + myBeta_t, myCson, adap_dt_stop) - ! Update bubble state - intfc_rad(k, 1) = myR - intfc_vel(k, 1) = myV - gas_p(k, 1) = myPb - gas_mv(k, 1) = myMass_v + ! Update bubble state + intfc_rad(k, 1) = myR + intfc_vel(k, 1) = myV + gas_p(k, 1) = myPb + gas_mv(k, 1) = myMass_v - else + else - ! Radial acceleration from bubble models - intfc_dveldt(k, stage) = f_rddot(myRho, myPinf, myR, myV, myR0, & - myPb, myPbdot, dmalf, dmntait, dmBtait, & - dm_bub_adv_src, dm_divu, & - myCson) - intfc_draddt(k, stage) = myV - gas_dmvdt(k, stage) = myMvdot - gas_dpdt(k, stage) = myPbdot + ! Radial acceleration from bubble models + intfc_dveldt(k, stage) = f_rddot(myRho, myPinf, myR, myV, myR0, & + myPb, myPbdot, dmalf, dmntait, dmBtait, & + dm_bub_adv_src, dm_divu, & + myCson) + intfc_draddt(k, stage) = myV + gas_dmvdt(k, stage) = myMvdot + gas_dpdt(k, stage) = myPbdot - end if + end if - adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) + adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) - end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") ! Bubbles remain in a fixed position - #:call GPU_PARALLEL_LOOP(collapse=2, private='[k]', copyin='[stage]') - do k = 1, nBubs - do l = 1, 3 - mtn_dposdt(k, l, stage) = 0._wp - mtn_dveldt(k, l, stage) = 0._wp - end do + $:GPU_PARALLEL_LOOP(collapse=2, private='[k,l]', copyin='[stage]') + do k = 1, nBubs + do l = 1, 3 + mtn_dposdt(k, l, stage) = 0._wp + mtn_dveldt(k, l, stage) = 0._wp end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() call nvtxEndRange @@ -717,38 +717,38 @@ contains ! (q / (1 - beta)) * d(beta)/dt source if (p == 0) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do k = 0, p - do j = 0, n - do i = 0, m - do l = 1, E_idx - if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & - q_cons_vf(l)%sf(i, j, k)*(q_beta%vf(2)%sf(i, j, k) + & - q_beta%vf(5)%sf(i, j, k)) - - end if - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do k = 0, p + do j = 0, n + do i = 0, m + do l = 1, E_idx + if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & + q_cons_vf(l)%sf(i, j, k)*(q_beta%vf(2)%sf(i, j, k) + & + q_beta%vf(5)%sf(i, j, k)) + + end if end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() else - #:call GPU_PARALLEL_LOOP(collapse=4) - do k = 0, p - do j = 0, n - do i = 0, m - do l = 1, E_idx - if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & - q_cons_vf(l)%sf(i, j, k)/q_beta%vf(1)%sf(i, j, k)* & - q_beta%vf(2)%sf(i, j, k) - end if - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do k = 0, p + do j = 0, n + do i = 0, m + do l = 1, E_idx + if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & + q_cons_vf(l)%sf(i, j, k)/q_beta%vf(1)%sf(i, j, k)* & + q_beta%vf(2)%sf(i, j, k) + end if end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if do l = 1, num_dims @@ -756,48 +756,48 @@ contains call s_gradient_dir(q_prim_vf(E_idx), q_beta%vf(3), l) ! (q / (1 - beta)) * d(beta)/dt source - #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(contxe + l)%sf(i, j, k) = rhs_vf(contxe + l)%sf(i, j, k) - & - (1._wp - q_beta%vf(1)%sf(i, j, k))/ & - q_beta%vf(1)%sf(i, j, k)* & - q_beta%vf(3)%sf(i, j, k) - end if - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) + do k = 0, p + do j = 0, n + do i = 0, m + if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(contxe + l)%sf(i, j, k) = rhs_vf(contxe + l)%sf(i, j, k) - & + (1._wp - q_beta%vf(1)%sf(i, j, k))/ & + q_beta%vf(1)%sf(i, j, k)* & + q_beta%vf(3)%sf(i, j, k) + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() !source in energy - #:call GPU_PARALLEL_LOOP(collapse=3) - do k = idwbuff(3)%beg, idwbuff(3)%end - do j = idwbuff(2)%beg, idwbuff(2)%end - do i = idwbuff(1)%beg, idwbuff(1)%end - q_beta%vf(3)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k)*q_prim_vf(contxe + l)%sf(i, j, k) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) + do k = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(2)%beg, idwbuff(2)%end + do i = idwbuff(1)%beg, idwbuff(1)%end + q_beta%vf(3)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k)*q_prim_vf(contxe + l)%sf(i, j, k) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() call s_gradient_dir(q_beta%vf(3), q_beta%vf(4), l) ! (beta / (1 - beta)) * d(Pu)/dl source - #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) - & - q_beta%vf(4)%sf(i, j, k)*(1._wp - q_beta%vf(1)%sf(i, j, k))/ & - q_beta%vf(1)%sf(i, j, k) - end if - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) + do k = 0, p + do j = 0, n + do i = 0, m + if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) - & + q_beta%vf(4)%sf(i, j, k)*(1._wp - q_beta%vf(1)%sf(i, j, k))/ & + q_beta%vf(1)%sf(i, j, k) + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end do end if @@ -843,34 +843,34 @@ contains call nvtxStartRange("BUBBLES-LAGRANGE-KERNELS") - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, q_beta_idx - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - q_beta%vf(i)%sf(j, k, l) = 0._wp - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, q_beta_idx + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + q_beta%vf(i)%sf(j, k, l) = 0._wp end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() call s_smoothfunction(nBubs, intfc_rad, intfc_vel, & mtn_s, mtn_pos, q_beta) !Store 1-beta - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - q_beta%vf(1)%sf(j, k, l) = 1._wp - q_beta%vf(1)%sf(j, k, l) - ! Limiting void fraction given max value - q_beta%vf(1)%sf(j, k, l) = max(q_beta%vf(1)%sf(j, k, l), & - 1._wp - lag_params%valmaxvoid) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + q_beta%vf(1)%sf(j, k, l) = 1._wp - q_beta%vf(1)%sf(j, k, l) + ! Limiting void fraction given max value + q_beta%vf(1)%sf(j, k, l) = max(q_beta%vf(1)%sf(j, k, l), & + 1._wp - lag_params%valmaxvoid) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() call nvtxEndRange @@ -1101,17 +1101,17 @@ contains integer :: k if (time_stepper == 1) then ! 1st order TVD RK - #:call GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - !u{1} = u{n} + dt * RHS{n} - intfc_rad(k, 1) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) - intfc_vel(k, 1) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) - mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) - mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) - gas_p(k, 1) = gas_p(k, 1) + dt*gas_dpdt(k, 1) - gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) - end do - #:endcall GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, nBubs + !u{1} = u{n} + dt * RHS{n} + intfc_rad(k, 1) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) + intfc_vel(k, 1) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) + mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) + mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) + gas_p(k, 1) = gas_p(k, 1) + dt*gas_dpdt(k, 1) + gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) + end do + $:END_GPU_PARALLEL_LOOP() call s_transfer_data_to_tmp() call s_write_void_evol(mytime) @@ -1124,30 +1124,30 @@ contains elseif (time_stepper == 2) then ! 2nd order TVD RK if (stage == 1) then - #:call GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - !u{1} = u{n} + dt * RHS{n} - intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) - intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) - mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) - mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) - gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) - gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) - end do - #:endcall GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, nBubs + !u{1} = u{n} + dt * RHS{n} + intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) + intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) + mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) + mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) + gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) + gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) + end do + $:END_GPU_PARALLEL_LOOP() elseif (stage == 2) then - #:call GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - !u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1}) - intfc_rad(k, 1) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/2._wp - intfc_vel(k, 1) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/2._wp - mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/2._wp - mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/2._wp - gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp - gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp - end do - #:endcall GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, nBubs + !u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1}) + intfc_rad(k, 1) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/2._wp + intfc_vel(k, 1) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/2._wp + mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/2._wp + mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/2._wp + gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp + gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp + end do + $:END_GPU_PARALLEL_LOOP() call s_transfer_data_to_tmp() call s_write_void_evol(mytime) @@ -1162,42 +1162,42 @@ contains elseif (time_stepper == 3) then ! 3rd order TVD RK if (stage == 1) then - #:call GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - !u{1} = u{n} + dt * RHS{n} - intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) - intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) - mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) - mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) - gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) - gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) - end do - #:endcall GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, nBubs + !u{1} = u{n} + dt * RHS{n} + intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) + intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) + mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) + mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) + gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) + gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) + end do + $:END_GPU_PARALLEL_LOOP() elseif (stage == 2) then - #:call GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - !u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}] - intfc_rad(k, 2) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/4._wp - intfc_vel(k, 2) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/4._wp - mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/4._wp - mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/4._wp - gas_p(k, 2) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/4._wp - gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp - end do - #:endcall GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, nBubs + !u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}] + intfc_rad(k, 2) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/4._wp + intfc_vel(k, 2) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/4._wp + mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/4._wp + mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/4._wp + gas_p(k, 2) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/4._wp + gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp + end do + $:END_GPU_PARALLEL_LOOP() elseif (stage == 3) then - #:call GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - !u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}] - intfc_rad(k, 1) = intfc_rad(k, 1) + (2._wp/3._wp)*dt*(intfc_draddt(k, 1)/4._wp + intfc_draddt(k, 2)/4._wp + intfc_draddt(k, 3)) - intfc_vel(k, 1) = intfc_vel(k, 1) + (2._wp/3._wp)*dt*(intfc_dveldt(k, 1)/4._wp + intfc_dveldt(k, 2)/4._wp + intfc_dveldt(k, 3)) - mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dposdt(k, 1:3, 1)/4._wp + mtn_dposdt(k, 1:3, 2)/4._wp + mtn_dposdt(k, 1:3, 3)) - mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dveldt(k, 1:3, 1)/4._wp + mtn_dveldt(k, 1:3, 2)/4._wp + mtn_dveldt(k, 1:3, 3)) - gas_p(k, 1) = gas_p(k, 1) + (2._wp/3._wp)*dt*(gas_dpdt(k, 1)/4._wp + gas_dpdt(k, 2)/4._wp + gas_dpdt(k, 3)) - gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3)) - end do - #:endcall GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, nBubs + !u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}] + intfc_rad(k, 1) = intfc_rad(k, 1) + (2._wp/3._wp)*dt*(intfc_draddt(k, 1)/4._wp + intfc_draddt(k, 2)/4._wp + intfc_draddt(k, 3)) + intfc_vel(k, 1) = intfc_vel(k, 1) + (2._wp/3._wp)*dt*(intfc_dveldt(k, 1)/4._wp + intfc_dveldt(k, 2)/4._wp + intfc_dveldt(k, 3)) + mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dposdt(k, 1:3, 1)/4._wp + mtn_dposdt(k, 1:3, 2)/4._wp + mtn_dposdt(k, 1:3, 3)) + mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dveldt(k, 1:3, 1)/4._wp + mtn_dveldt(k, 1:3, 2)/4._wp + mtn_dveldt(k, 1:3, 3)) + gas_p(k, 1) = gas_p(k, 1) + (2._wp/3._wp)*dt*(gas_dpdt(k, 1)/4._wp + gas_dpdt(k, 2)/4._wp + gas_dpdt(k, 3)) + gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3)) + end do + $:END_GPU_PARALLEL_LOOP() call s_transfer_data_to_tmp() call s_write_void_evol(mytime) @@ -1274,18 +1274,18 @@ contains integer :: k - #:call GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - gas_p(k, 2) = gas_p(k, 1) - gas_mv(k, 2) = gas_mv(k, 1) - intfc_rad(k, 2) = intfc_rad(k, 1) - intfc_vel(k, 2) = intfc_vel(k, 1) - mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) - mtn_posPrev(k, 1:3, 2) = mtn_posPrev(k, 1:3, 1) - mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) - mtn_s(k, 1:3, 2) = mtn_s(k, 1:3, 1) - end do - #:endcall GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, nBubs + gas_p(k, 2) = gas_p(k, 1) + gas_mv(k, 2) = gas_mv(k, 1) + intfc_rad(k, 2) = intfc_rad(k, 1) + intfc_vel(k, 2) = intfc_vel(k, 1) + mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + mtn_posPrev(k, 1:3, 2) = mtn_posPrev(k, 1:3, 1) + mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + mtn_s(k, 1:3, 2) = mtn_s(k, 1:3, 1) + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_transfer_data_to_tmp @@ -1373,49 +1373,49 @@ contains if (dir == 1) then ! Gradient in x dir. - #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - dq%sf(i, j, k) = q%sf(i, j, k)*(dx(i + 1) - dx(i - 1)) & - + q%sf(i + 1, j, k)*(dx(i) + dx(i - 1)) & - - q%sf(i - 1, j, k)*(dx(i) + dx(i + 1)) - dq%sf(i, j, k) = dq%sf(i, j, k)/ & - ((dx(i) + dx(i - 1))*(dx(i) + dx(i + 1))) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) + do k = 0, p + do j = 0, n + do i = 0, m + dq%sf(i, j, k) = q%sf(i, j, k)*(dx(i + 1) - dx(i - 1)) & + + q%sf(i + 1, j, k)*(dx(i) + dx(i - 1)) & + - q%sf(i - 1, j, k)*(dx(i) + dx(i + 1)) + dq%sf(i, j, k) = dq%sf(i, j, k)/ & + ((dx(i) + dx(i - 1))*(dx(i) + dx(i + 1))) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() elseif (dir == 2) then ! Gradient in y dir. - #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - dq%sf(i, j, k) = q%sf(i, j, k)*(dy(j + 1) - dy(j - 1)) & - + q%sf(i, j + 1, k)*(dy(j) + dy(j - 1)) & - - q%sf(i, j - 1, k)*(dy(j) + dy(j + 1)) - dq%sf(i, j, k) = dq%sf(i, j, k)/ & - ((dy(j) + dy(j - 1))*(dy(j) + dy(j + 1))) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) + do k = 0, p + do j = 0, n + do i = 0, m + dq%sf(i, j, k) = q%sf(i, j, k)*(dy(j + 1) - dy(j - 1)) & + + q%sf(i, j + 1, k)*(dy(j) + dy(j - 1)) & + - q%sf(i, j - 1, k)*(dy(j) + dy(j + 1)) + dq%sf(i, j, k) = dq%sf(i, j, k)/ & + ((dy(j) + dy(j - 1))*(dy(j) + dy(j + 1))) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() elseif (dir == 3) then ! Gradient in z dir. - #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - dq%sf(i, j, k) = q%sf(i, j, k)*(dz(k + 1) - dz(k - 1)) & - + q%sf(i, j, k + 1)*(dz(k) + dz(k - 1)) & - - q%sf(i, j, k - 1)*(dz(k) + dz(k + 1)) - dq%sf(i, j, k) = dq%sf(i, j, k)/ & - ((dz(k) + dz(k - 1))*(dz(k) + dz(k + 1))) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) + do k = 0, p + do j = 0, n + do i = 0, m + dq%sf(i, j, k) = q%sf(i, j, k)*(dz(k + 1) - dz(k - 1)) & + + q%sf(i, j, k + 1)*(dz(k) + dz(k - 1)) & + - q%sf(i, j, k - 1)*(dz(k) + dz(k + 1)) + dq%sf(i, j, k) = dq%sf(i, j, k)/ & + ((dz(k) + dz(k - 1))*(dz(k) + dz(k + 1))) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_gradient_dir @@ -1511,20 +1511,20 @@ contains lag_void_max = 0._wp lag_void_avg = 0._wp lag_vol = 0._wp - #:call GPU_PARALLEL_LOOP(collapse=3, reduction='[[lag_vol, lag_void_avg], [lag_void_max]]', reductionOp='[+, MAX]', copy='[lag_vol, lag_void_avg, lag_void_max]') - do k = 0, p - do j = 0, n - do i = 0, m - lag_void_max = max(lag_void_max, 1._wp - q_beta%vf(1)%sf(i, j, k)) - call s_get_char_vol(i, j, k, volcell) - if ((1._wp - q_beta%vf(1)%sf(i, j, k)) > 5.0d-11) then - lag_void_avg = lag_void_avg + (1._wp - q_beta%vf(1)%sf(i, j, k))*volcell - lag_vol = lag_vol + volcell - end if - end do + $:GPU_PARALLEL_LOOP(private='[volcell]', collapse=3, reduction='[[lag_vol, lag_void_avg], [lag_void_max]]', reductionOp='[+, MAX]', copy='[lag_vol, lag_void_avg, lag_void_max]') + do k = 0, p + do j = 0, n + do i = 0, m + lag_void_max = max(lag_void_max, 1._wp - q_beta%vf(1)%sf(i, j, k)) + call s_get_char_vol(i, j, k, volcell) + if ((1._wp - q_beta%vf(1)%sf(i, j, k)) > 5.0d-11) then + lag_void_avg = lag_void_avg + (1._wp - q_beta%vf(1)%sf(i, j, k))*volcell + lag_vol = lag_vol + volcell + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() #ifdef MFC_MPI if (num_procs > 1) then @@ -1707,15 +1707,15 @@ contains integer :: k - #:call GPU_PARALLEL_LOOP(reduction='[[Rmax_glb], [Rmin_glb]]', & + $:GPU_PARALLEL_LOOP(private='[k]', reduction='[[Rmax_glb], [Rmin_glb]]', & & reductionOp='[MAX, MIN]', copy='[Rmax_glb,Rmin_glb]') - do k = 1, nBubs - Rmax_glb = max(Rmax_glb, intfc_rad(k, 1)/bub_R0(k)) - Rmin_glb = min(Rmin_glb, intfc_rad(k, 1)/bub_R0(k)) - Rmax_stats(k) = max(Rmax_stats(k), intfc_rad(k, 1)/bub_R0(k)) - Rmin_stats(k) = min(Rmin_stats(k), intfc_rad(k, 1)/bub_R0(k)) - end do - #:endcall GPU_PARALLEL_LOOP + do k = 1, nBubs + Rmax_glb = max(Rmax_glb, intfc_rad(k, 1)/bub_R0(k)) + Rmin_glb = min(Rmin_glb, intfc_rad(k, 1)/bub_R0(k)) + Rmax_stats(k) = max(Rmax_stats(k), intfc_rad(k, 1)/bub_R0(k)) + Rmin_stats(k) = min(Rmin_stats(k), intfc_rad(k, 1)/bub_R0(k)) + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_calculate_lag_bubble_stats diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 218eaa6ea6..eff9a33ccc 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -55,42 +55,42 @@ contains real(wp), dimension(3) :: s_coord integer :: l - #:call GPU_PARALLEL_LOOP(private='[l,s_coord,cell]') - do l = 1, nBubs + $:GPU_PARALLEL_LOOP(private='[l,s_coord,cell]') + do l = 1, nBubs - volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp - s_coord(1:3) = lbk_s(l, 1:3, 2) - call s_get_cell(s_coord, cell) + volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp + s_coord(1:3) = lbk_s(l, 1:3, 2) + call s_get_cell(s_coord, cell) - strength_vol = volpart - strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) + strength_vol = volpart + strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) - if (num_dims == 2) then - Vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth - if (cyl_coord) Vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi - else - Vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) - end if + if (num_dims == 2) then + Vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth + if (cyl_coord) Vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi + else + Vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) + end if - !Update void fraction field - addFun1 = strength_vol/Vol - $:GPU_ATOMIC(atomic='update') - updatedvar%vf(1)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(1)%sf(cell(1), cell(2), cell(3)) + addFun1 + !Update void fraction field + addFun1 = strength_vol/Vol + $:GPU_ATOMIC(atomic='update') + updatedvar%vf(1)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(1)%sf(cell(1), cell(2), cell(3)) + addFun1 + + !Update time derivative of void fraction + addFun2 = strength_vel/Vol + $:GPU_ATOMIC(atomic='update') + updatedvar%vf(2)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(2)%sf(cell(1), cell(2), cell(3)) + addFun2 - !Update time derivative of void fraction - addFun2 = strength_vel/Vol + !Product of two smeared functions + !Update void fraction * time derivative of void fraction + if (lag_params%cluster_type >= 4) then + addFun3 = (strength_vol*strength_vel)/Vol $:GPU_ATOMIC(atomic='update') - updatedvar%vf(2)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(2)%sf(cell(1), cell(2), cell(3)) + addFun2 - - !Product of two smeared functions - !Update void fraction * time derivative of void fraction - if (lag_params%cluster_type >= 4) then - addFun3 = (strength_vol*strength_vel)/Vol - $:GPU_ATOMIC(atomic='update') - updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) + addFun3 - end if - end do - #:endcall GPU_PARALLEL_LOOP + updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) + addFun3 + end if + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_deltafunc @@ -121,82 +121,82 @@ contains smearGridz = smearGrid if (p == 0) smearGridz = 1 - #:call GPU_PARALLEL_LOOP(private='[nodecoord,l,s_coord,cell,center]', copyin='[smearGrid,smearGridz]') - do l = 1, nBubs - nodecoord(1:3) = 0 - center(1:3) = 0._wp - volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp - s_coord(1:3) = lbk_s(l, 1:3, 2) - center(1:2) = lbk_pos(l, 1:2, 2) - if (p > 0) center(3) = lbk_pos(l, 3, 2) - call s_get_cell(s_coord, cell) - call s_compute_stddsv(cell, volpart, stddsv) - - strength_vol = volpart - strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) - - $:GPU_LOOP(collapse=3,private='[cellaux,nodecoord]') - do i = 1, smearGrid - do j = 1, smearGrid - do k = 1, smearGridz - cellaux(1) = cell(1) + i - (mapCells + 1) - cellaux(2) = cell(2) + j - (mapCells + 1) - cellaux(3) = cell(3) + k - (mapCells + 1) - if (p == 0) cellaux(3) = 0 - - !Check if the cells intended to smear the bubbles in are in the computational domain - !and redefine the cells for symmetric boundary - call s_check_celloutside(cellaux, celloutside) - - if (.not. celloutside) then - - nodecoord(1) = x_cc(cellaux(1)) - nodecoord(2) = y_cc(cellaux(2)) - if (p > 0) nodecoord(3) = z_cc(cellaux(3)) - call s_applygaussian(center, cellaux, nodecoord, stddsv, 0._wp, func) - if (lag_params%cluster_type >= 4) call s_applygaussian(center, cellaux, nodecoord, stddsv, 1._wp, func2) - - ! Relocate cells for bubbles intersecting symmetric boundaries - if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == BC_REFLECTIVE)) then - call s_shift_cell_symmetric_bc(cellaux, cell) - end if - else - func = 0._wp - func2 = 0._wp - cellaux(1) = cell(1) - cellaux(2) = cell(2) - cellaux(3) = cell(3) - if (p == 0) cellaux(3) = 0 + $:GPU_PARALLEL_LOOP(private='[nodecoord,i,j,k,l,s_coord,cell,center]', copyin='[smearGrid,smearGridz]') + do l = 1, nBubs + nodecoord(1:3) = 0 + center(1:3) = 0._wp + volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp + s_coord(1:3) = lbk_s(l, 1:3, 2) + center(1:2) = lbk_pos(l, 1:2, 2) + if (p > 0) center(3) = lbk_pos(l, 3, 2) + call s_get_cell(s_coord, cell) + call s_compute_stddsv(cell, volpart, stddsv) + + strength_vol = volpart + strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) + + $:GPU_LOOP(collapse=3,private='[cellaux,nodecoord]') + do i = 1, smearGrid + do j = 1, smearGrid + do k = 1, smearGridz + cellaux(1) = cell(1) + i - (mapCells + 1) + cellaux(2) = cell(2) + j - (mapCells + 1) + cellaux(3) = cell(3) + k - (mapCells + 1) + if (p == 0) cellaux(3) = 0 + + !Check if the cells intended to smear the bubbles in are in the computational domain + !and redefine the cells for symmetric boundary + call s_check_celloutside(cellaux, celloutside) + + if (.not. celloutside) then + + nodecoord(1) = x_cc(cellaux(1)) + nodecoord(2) = y_cc(cellaux(2)) + if (p > 0) nodecoord(3) = z_cc(cellaux(3)) + call s_applygaussian(center, cellaux, nodecoord, stddsv, 0._wp, func) + if (lag_params%cluster_type >= 4) call s_applygaussian(center, cellaux, nodecoord, stddsv, 1._wp, func2) + + ! Relocate cells for bubbles intersecting symmetric boundaries + if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == BC_REFLECTIVE)) then + call s_shift_cell_symmetric_bc(cellaux, cell) end if - - !Update void fraction field - addFun1 = func*strength_vol - $:GPU_ATOMIC(atomic='update') - updatedvar%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3)) = & - updatedvar%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3)) & - + addFun1 - - !Update time derivative of void fraction - addFun2 = func*strength_vel + else + func = 0._wp + func2 = 0._wp + cellaux(1) = cell(1) + cellaux(2) = cell(2) + cellaux(3) = cell(3) + if (p == 0) cellaux(3) = 0 + end if + + !Update void fraction field + addFun1 = func*strength_vol + $:GPU_ATOMIC(atomic='update') + updatedvar%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3)) = & + updatedvar%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3)) & + + addFun1 + + !Update time derivative of void fraction + addFun2 = func*strength_vel + $:GPU_ATOMIC(atomic='update') + updatedvar%vf(2)%sf(cellaux(1), cellaux(2), cellaux(3)) = & + updatedvar%vf(2)%sf(cellaux(1), cellaux(2), cellaux(3)) & + + addFun2 + + !Product of two smeared functions + !Update void fraction * time derivative of void fraction + if (lag_params%cluster_type >= 4) then + addFun3 = func2*strength_vol*strength_vel $:GPU_ATOMIC(atomic='update') - updatedvar%vf(2)%sf(cellaux(1), cellaux(2), cellaux(3)) = & - updatedvar%vf(2)%sf(cellaux(1), cellaux(2), cellaux(3)) & - + addFun2 - - !Product of two smeared functions - !Update void fraction * time derivative of void fraction - if (lag_params%cluster_type >= 4) then - addFun3 = func2*strength_vol*strength_vel - $:GPU_ATOMIC(atomic='update') - updatedvar%vf(5)%sf(cellaux(1), cellaux(2), cellaux(3)) = & - updatedvar%vf(5)%sf(cellaux(1), cellaux(2), cellaux(3)) & - + addFun3 - end if - end do + updatedvar%vf(5)%sf(cellaux(1), cellaux(2), cellaux(3)) = & + updatedvar%vf(5)%sf(cellaux(1), cellaux(2), cellaux(3)) & + + addFun3 + end if end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_gaussian @@ -381,7 +381,7 @@ contains !> The purpose of this procedure is to calculate the characteristic cell volume !! @param cell Computational coordinates (x, y, z) !! @param Charvol Characteristic volume - elemental subroutine s_get_char_vol(cellx, celly, cellz, Charvol) + subroutine s_get_char_vol(cellx, celly, cellz, Charvol) $:GPU_ROUTINE(function_name='s_get_char_vol',parallelism='[seq]', & & cray_inline=True) diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 4df1c4fcf0..fec4fc2dcc 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -710,31 +710,31 @@ contains F_src_rs${XYZ}$_vf, & is1, is2, is3, idwbuff(2)%beg, idwbuff(3)%beg) - #:call GPU_PARALLEL_LOOP(collapse=3) - do i = 1, flux_cbc_index - do r = is3%beg, is3%end - do k = is2%beg, is2%end - flux_rs${XYZ}$_vf_l(0, k, r, i) = F_rs${XYZ}$_vf(0, k, r, i) & - + pi_coef_${XYZ}$ (0, 0, cbc_loc)* & - (F_rs${XYZ}$_vf(1, k, r, i) - & - F_rs${XYZ}$_vf(0, k, r, i)) - end do + $:GPU_PARALLEL_LOOP(private='[i,r,k]', collapse=3) + do i = 1, flux_cbc_index + do r = is3%beg, is3%end + do k = is2%beg, is2%end + flux_rs${XYZ}$_vf_l(0, k, r, i) = F_rs${XYZ}$_vf(0, k, r, i) & + + pi_coef_${XYZ}$ (0, 0, cbc_loc)* & + (F_rs${XYZ}$_vf(1, k, r, i) - & + F_rs${XYZ}$_vf(0, k, r, i)) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - flux_src_rs${XYZ}$_vf_l(0, k, r, i) = F_src_rs${XYZ}$_vf(0, k, r, i) + & - (F_src_rs${XYZ}$_vf(1, k, r, i) - & - F_src_rs${XYZ}$_vf(0, k, r, i)) & - *pi_coef_${XYZ}$ (0, 0, cbc_loc) - end do + $:GPU_PARALLEL_LOOP(private='[i,r,k]', collapse=3) + do i = advxb, advxe + do r = is3%beg, is3%end + do k = is2%beg, is2%end + flux_src_rs${XYZ}$_vf_l(0, k, r, i) = F_src_rs${XYZ}$_vf(0, k, r, i) + & + (F_src_rs${XYZ}$_vf(1, k, r, i) - & + F_src_rs${XYZ}$_vf(0, k, r, i)) & + *pi_coef_${XYZ}$ (0, 0, cbc_loc) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() ! PI4 of flux_rs_vf and flux_src_rs_vf at j = 1/2, 3/2 else @@ -743,390 +743,390 @@ contains F_src_rs${XYZ}$_vf, & is1, is2, is3, idwbuff(2)%beg, idwbuff(3)%beg) - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, flux_cbc_index - do j = 0, 1 - do r = is3%beg, is3%end - do k = is2%beg, is2%end - flux_rs${XYZ}$_vf_l(j, k, r, i) = F_rs${XYZ}$_vf(j, k, r, i) & - + pi_coef_${XYZ}$ (j, 0, cbc_loc)* & - (F_rs${XYZ}$_vf(3, k, r, i) - & - F_rs${XYZ}$_vf(2, k, r, i)) & - + pi_coef_${XYZ}$ (j, 1, cbc_loc)* & - (F_rs${XYZ}$_vf(2, k, r, i) - & - F_rs${XYZ}$_vf(1, k, r, i)) & - + pi_coef_${XYZ}$ (j, 2, cbc_loc)* & - (F_rs${XYZ}$_vf(1, k, r, i) - & - F_rs${XYZ}$_vf(0, k, r, i)) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,r,k]', collapse=4) + do i = 1, flux_cbc_index + do j = 0, 1 + do r = is3%beg, is3%end + do k = is2%beg, is2%end + flux_rs${XYZ}$_vf_l(j, k, r, i) = F_rs${XYZ}$_vf(j, k, r, i) & + + pi_coef_${XYZ}$ (j, 0, cbc_loc)* & + (F_rs${XYZ}$_vf(3, k, r, i) - & + F_rs${XYZ}$_vf(2, k, r, i)) & + + pi_coef_${XYZ}$ (j, 1, cbc_loc)* & + (F_rs${XYZ}$_vf(2, k, r, i) - & + F_rs${XYZ}$_vf(1, k, r, i)) & + + pi_coef_${XYZ}$ (j, 2, cbc_loc)* & + (F_rs${XYZ}$_vf(1, k, r, i) - & + F_rs${XYZ}$_vf(0, k, r, i)) end do end do end do - #:endcall GPU_PARALLEL_LOOP - - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = advxb, advxe - do j = 0, 1 - do r = is3%beg, is3%end - do k = is2%beg, is2%end - flux_src_rs${XYZ}$_vf_l(j, k, r, i) = F_src_rs${XYZ}$_vf(j, k, r, i) + & - (F_src_rs${XYZ}$_vf(3, k, r, i) - & - F_src_rs${XYZ}$_vf(2, k, r, i)) & - *pi_coef_${XYZ}$ (j, 0, cbc_loc) + & - (F_src_rs${XYZ}$_vf(2, k, r, i) - & - F_src_rs${XYZ}$_vf(1, k, r, i)) & - *pi_coef_${XYZ}$ (j, 1, cbc_loc) + & - (F_src_rs${XYZ}$_vf(1, k, r, i) - & - F_src_rs${XYZ}$_vf(0, k, r, i)) & - *pi_coef_${XYZ}$ (j, 2, cbc_loc) - end do + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[i,j,r,k]', collapse=4) + do i = advxb, advxe + do j = 0, 1 + do r = is3%beg, is3%end + do k = is2%beg, is2%end + flux_src_rs${XYZ}$_vf_l(j, k, r, i) = F_src_rs${XYZ}$_vf(j, k, r, i) + & + (F_src_rs${XYZ}$_vf(3, k, r, i) - & + F_src_rs${XYZ}$_vf(2, k, r, i)) & + *pi_coef_${XYZ}$ (j, 0, cbc_loc) + & + (F_src_rs${XYZ}$_vf(2, k, r, i) - & + F_src_rs${XYZ}$_vf(1, k, r, i)) & + *pi_coef_${XYZ}$ (j, 1, cbc_loc) + & + (F_src_rs${XYZ}$_vf(1, k, r, i) - & + F_src_rs${XYZ}$_vf(0, k, r, i)) & + *pi_coef_${XYZ}$ (j, 2, cbc_loc) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if ! FD2 or FD4 of RHS at j = 0 - #:call GPU_PARALLEL_LOOP(collapse=2, private='[alpha_rho, vel, adv_local, mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds,dvel_dt, dadv_dt, dalpha_rho_dt, L, lambda, Ys, dYs_dt, dYs_ds, h_k, Cp_i, Gamma_i, Xs]') - do r = is3%beg, is3%end - do k = is2%beg, is2%end + $:GPU_PARALLEL_LOOP(collapse=2, private='[r,k,alpha_rho, vel, adv_local, mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds,dvel_dt, dadv_dt, dalpha_rho_dt, L, lambda, Ys, dYs_dt, dYs_ds, h_k, Cp_i, Gamma_i, Xs]') + do r = is3%beg, is3%end + do k = is2%beg, is2%end - ! Transferring the Primitive Variables - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - alpha_rho(i) = q_prim_rs${XYZ}$_vf(0, k, r, i) - end do + ! Transferring the Primitive Variables + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + alpha_rho(i) = q_prim_rs${XYZ}$_vf(0, k, r, i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel(i) = q_prim_rs${XYZ}$_vf(0, k, r, contxe + i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel(i) = q_prim_rs${XYZ}$_vf(0, k, r, contxe + i) + end do - vel_K_sum = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_K_sum = vel_K_sum + vel(i)**2._wp - end do + vel_K_sum = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_K_sum = vel_K_sum + vel(i)**2._wp + end do + + pres = q_prim_rs${XYZ}$_vf(0, k, r, E_idx) - pres = q_prim_rs${XYZ}$_vf(0, k, r, E_idx) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, advxe - E_idx + adv_local(i) = q_prim_rs${XYZ}$_vf(0, k, r, E_idx + i) + end do + + if (bubbles_euler) then + call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv, adv_local, alpha_rho, Re_cbc) + else + call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, adv_local, alpha_rho, Re_cbc) + end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + mf(i) = alpha_rho(i)/rho + end do + + if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, advxe - E_idx - adv_local(i) = q_prim_rs${XYZ}$_vf(0, k, r, E_idx + i) + do i = chemxb, chemxe + Ys(i - chemxb + 1) = q_prim_rs${XYZ}$_vf(0, k, r, i) end do - if (bubbles_euler) then - call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv, adv_local, alpha_rho, Re_cbc) - else - call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, adv_local, alpha_rho, Re_cbc) + call get_mixture_molecular_weight(Ys, Mw) + R_gas = gas_constant/Mw + T = pres/rho/R_gas + call get_mixture_specific_heat_cp_mass(T, Ys, Cp) + call get_mixture_energy_mass(T, Ys, e_mix) + E = rho*e_mix + 5.e-1_wp*rho*vel_K_sum + if (chem_params%gamma_method == 1) then + !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. + call get_mole_fractions(Mw, Ys, Xs) + call get_species_specific_heats_r(T, Cp_i) + Gamma_i = Cp_i/(Cp_i - 1.0_wp) + gamma = sum(Xs(:)/(Gamma_i(:) - 1.0_wp)) + else if (chem_params%gamma_method == 2) then + !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. + call get_mixture_specific_heat_cv_mass(T, Ys, Cv) + gamma = 1.0_wp/(Cp/Cv - 1.0_wp) end if + else + E = gamma*pres + pi_inf + 5.e-1_wp*rho*vel_K_sum + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - mf(i) = alpha_rho(i)/rho - end do + H = (E + pres)/rho - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys(i - chemxb + 1) = q_prim_rs${XYZ}$_vf(0, k, r, i) - end do + ! Compute mixture sound speed + call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv_local, vel_K_sum, 0._wp, c) - call get_mixture_molecular_weight(Ys, Mw) - R_gas = gas_constant/Mw - T = pres/rho/R_gas - call get_mixture_specific_heat_cp_mass(T, Ys, Cp) - call get_mixture_energy_mass(T, Ys, e_mix) - E = rho*e_mix + 5.e-1_wp*rho*vel_K_sum - if (chem_params%gamma_method == 1) then - !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. - call get_mole_fractions(Mw, Ys, Xs) - call get_species_specific_heats_r(T, Cp_i) - Gamma_i = Cp_i/(Cp_i - 1.0_wp) - gamma = sum(Xs(:)/(Gamma_i(:) - 1.0_wp)) - else if (chem_params%gamma_method == 2) then - !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - call get_mixture_specific_heat_cv_mass(T, Ys, Cv) - gamma = 1.0_wp/(Cp/Cv - 1.0_wp) - end if - else - E = gamma*pres + pi_inf + 5.e-1_wp*rho*vel_K_sum - end if + ! First-Order Spatial Derivatives of Primitive Variables + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + dalpha_rho_ds(i) = 0._wp + end do - H = (E + pres)/rho + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + dvel_ds(i) = 0._wp + end do + + dpres_ds = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, advxe - E_idx + dadv_ds(i) = 0._wp + end do - ! Compute mixture sound speed - call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv_local, vel_K_sum, 0._wp, c) + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_species + dYs_ds(i) = 0._wp + end do + end if - ! First-Order Spatial Derivatives of Primitive Variables + $:GPU_LOOP(parallelism='[seq]') + do j = 0, buff_size $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe - dalpha_rho_ds(i) = 0._wp + dalpha_rho_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, i)* & + fd_coef_${XYZ}$ (j, cbc_loc) + & + dalpha_rho_ds(i) end do - $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - dvel_ds(i) = 0._wp + dvel_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, contxe + i)* & + fd_coef_${XYZ}$ (j, cbc_loc) + & + dvel_ds(i) end do - dpres_ds = 0._wp + dpres_ds = q_prim_rs${XYZ}$_vf(j, k, r, E_idx)* & + fd_coef_${XYZ}$ (j, cbc_loc) + & + dpres_ds $:GPU_LOOP(parallelism='[seq]') do i = 1, advxe - E_idx - dadv_ds(i) = 0._wp + dadv_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, E_idx + i)* & + fd_coef_${XYZ}$ (j, cbc_loc) + & + dadv_ds(i) end do if (chemistry) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_species - dYs_ds(i) = 0._wp + dYs_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, chemxb - 1 + i)* & + fd_coef_${XYZ}$ (j, cbc_loc) + & + dYs_ds(i) end do end if + end do - $:GPU_LOOP(parallelism='[seq]') - do j = 0, buff_size - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - dalpha_rho_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, i)* & - fd_coef_${XYZ}$ (j, cbc_loc) + & - dalpha_rho_ds(i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - dvel_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, contxe + i)* & - fd_coef_${XYZ}$ (j, cbc_loc) + & - dvel_ds(i) - end do - - dpres_ds = q_prim_rs${XYZ}$_vf(j, k, r, E_idx)* & - fd_coef_${XYZ}$ (j, cbc_loc) + & - dpres_ds + ! First-Order Temporal Derivatives of Primitive Variables + lambda(1) = vel(dir_idx(1)) - c + lambda(2) = vel(dir_idx(1)) + lambda(3) = vel(dir_idx(1)) + c + + Ma = vel(dir_idx(1))/c + + if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SLIP_WALL) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SLIP_WALL)) then + call s_compute_slip_wall_L(lambda, L, rho, c, dpres_ds, dvel_ds) + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_BUFFER) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_BUFFER)) then + call s_compute_nonreflecting_subsonic_buffer_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_INFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_INFLOW)) then + call s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, dpres_ds, dvel_ds) + ! Add GRCBC for Subsonic Inflow + if (bc_${XYZ}$%grcbc_in) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, advxe - E_idx - dadv_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, E_idx + i)* & - fd_coef_${XYZ}$ (j, cbc_loc) + & - dadv_ds(i) + do i = 2, momxb + L(2) = c**3._wp*Ma*(alpha_rho(i - 1) - alpha_rho_in(i - 1, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) - c*Ma*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$) end do - - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - dYs_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, chemxb - 1 + i)* & - fd_coef_${XYZ}$ (j, cbc_loc) + & - dYs_ds(i) - end do - end if - end do - - ! First-Order Temporal Derivatives of Primitive Variables - lambda(1) = vel(dir_idx(1)) - c - lambda(2) = vel(dir_idx(1)) - lambda(3) = vel(dir_idx(1)) + c - - Ma = vel(dir_idx(1))/c - - if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SLIP_WALL) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SLIP_WALL)) then - call s_compute_slip_wall_L(lambda, L, rho, c, dpres_ds, dvel_ds) - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_BUFFER) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_BUFFER)) then - call s_compute_nonreflecting_subsonic_buffer_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_INFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_INFLOW)) then - call s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, dpres_ds, dvel_ds) - ! Add GRCBC for Subsonic Inflow - if (bc_${XYZ}$%grcbc_in) then - $:GPU_LOOP(parallelism='[seq]') - do i = 2, momxb - L(2) = c**3._wp*Ma*(alpha_rho(i - 1) - alpha_rho_in(i - 1, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) - c*Ma*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$) - end do - if (n > 0) then - L(momxb + 1) = c*Ma*(vel(dir_idx(2)) - vel_in(${CBC_DIR}$, dir_idx(2)))/Del_in(${CBC_DIR}$) - if (p > 0) then - L(momxb + 2) = c*Ma*(vel(dir_idx(3)) - vel_in(${CBC_DIR}$, dir_idx(3)))/Del_in(${CBC_DIR}$) - end if + if (n > 0) then + L(momxb + 1) = c*Ma*(vel(dir_idx(2)) - vel_in(${CBC_DIR}$, dir_idx(2)))/Del_in(${CBC_DIR}$) + if (p > 0) then + L(momxb + 2) = c*Ma*(vel(dir_idx(3)) - vel_in(${CBC_DIR}$, dir_idx(3)))/Del_in(${CBC_DIR}$) end if - $:GPU_LOOP(parallelism='[seq]') - do i = E_idx, advxe - 1 - L(i) = c*Ma*(adv_local(i + 1 - E_idx) - alpha_in(i + 1 - E_idx, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) - end do - L(advxe) = rho*c**2._wp*(1._wp + Ma)*(vel(dir_idx(1)) + vel_in(${CBC_DIR}$, dir_idx(1))*sign(1, cbc_loc))/Del_in(${CBC_DIR}$) + c*(1._wp + Ma)*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$) end if - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_OUTFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_OUTFLOW)) then - call s_compute_nonreflecting_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) - ! Add GRCBC for Subsonic Outflow (Pressure) - if (bc_${XYZ}$%grcbc_out) then - L(advxe) = c*(1._wp - Ma)*(pres - pres_out(${CBC_DIR}$))/Del_out(${CBC_DIR}$) - - ! Add GRCBC for Subsonic Outflow (Normal Velocity) - if (bc_${XYZ}$%grcbc_vel_out) then - L(advxe) = L(advxe) + rho*c**2._wp*(1._wp - Ma)*(vel(dir_idx(1)) + vel_out(${CBC_DIR}$, dir_idx(1))*sign(1, cbc_loc))/Del_out(${CBC_DIR}$) - end if + $:GPU_LOOP(parallelism='[seq]') + do i = E_idx, advxe - 1 + L(i) = c*Ma*(adv_local(i + 1 - E_idx) - alpha_in(i + 1 - E_idx, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) + end do + L(advxe) = rho*c**2._wp*(1._wp + Ma)*(vel(dir_idx(1)) + vel_in(${CBC_DIR}$, dir_idx(1))*sign(1, cbc_loc))/Del_in(${CBC_DIR}$) + c*(1._wp + Ma)*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$) + end if + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_OUTFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_OUTFLOW)) then + call s_compute_nonreflecting_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) + ! Add GRCBC for Subsonic Outflow (Pressure) + if (bc_${XYZ}$%grcbc_out) then + L(advxe) = c*(1._wp - Ma)*(pres - pres_out(${CBC_DIR}$))/Del_out(${CBC_DIR}$) + + ! Add GRCBC for Subsonic Outflow (Normal Velocity) + if (bc_${XYZ}$%grcbc_vel_out) then + L(advxe) = L(advxe) + rho*c**2._wp*(1._wp - Ma)*(vel(dir_idx(1)) + vel_out(${CBC_DIR}$, dir_idx(1))*sign(1, cbc_loc))/Del_out(${CBC_DIR}$) end if - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_FF_SUB_OUTFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_FF_SUB_OUTFLOW)) then - call s_compute_force_free_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_CP_SUB_OUTFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_CP_SUB_OUTFLOW)) then - call s_compute_constant_pressure_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SUP_INFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SUP_INFLOW)) then - call s_compute_supersonic_inflow_L(L) - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SUP_OUTFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SUP_OUTFLOW)) then - call s_compute_supersonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) end if + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_FF_SUB_OUTFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_FF_SUB_OUTFLOW)) then + call s_compute_force_free_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_CP_SUB_OUTFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_CP_SUB_OUTFLOW)) then + call s_compute_constant_pressure_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SUP_INFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SUP_INFLOW)) then + call s_compute_supersonic_inflow_L(L) + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SUP_OUTFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SUP_OUTFLOW)) then + call s_compute_supersonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) + end if + + ! Be careful about the cylindrical coordinate! + if (cyl_coord .and. cbc_dir == 2 .and. cbc_loc == 1) then + dpres_dt = -5.e-1_wp*(L(advxe) + L(1)) + rho*c*c*vel(dir_idx(1)) & + /y_cc(n) + else + dpres_dt = -5.e-1_wp*(L(advxe) + L(1)) + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + dalpha_rho_dt(i) = & + -(L(i + 1) - mf(i)*dpres_dt)/(c*c) + end do - ! Be careful about the cylindrical coordinate! - if (cyl_coord .and. cbc_dir == 2 .and. cbc_loc == 1) then - dpres_dt = -5.e-1_wp*(L(advxe) + L(1)) + rho*c*c*vel(dir_idx(1)) & - /y_cc(n) - else - dpres_dt = -5.e-1_wp*(L(advxe) + L(1)) - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + dvel_dt(dir_idx(i)) = dir_flg(dir_idx(i))* & + (L(1) - L(advxe))/(2._wp*rho*c) + & + (dir_flg(dir_idx(i)) - 1._wp)* & + L(momxb + i - 1) + end do + + vel_dv_dt_sum = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_dv_dt_sum = vel_dv_dt_sum + vel(i)*dvel_dt(i) + end do + if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - dalpha_rho_dt(i) = & - -(L(i + 1) - mf(i)*dpres_dt)/(c*c) + do i = 1, num_species + dYs_dt(i) = -1._wp*L(chemxb + i - 1) end do + end if + ! The treatment of void fraction source is unclear + if (cyl_coord .and. cbc_dir == 2 .and. cbc_loc == 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - dvel_dt(dir_idx(i)) = dir_flg(dir_idx(i))* & - (L(1) - L(advxe))/(2._wp*rho*c) + & - (dir_flg(dir_idx(i)) - 1._wp)* & - L(momxb + i - 1) + do i = 1, advxe - E_idx + dadv_dt(i) = -L(momxe + i) !+ adv_local(i) * vel(dir_idx(1))/y_cc(n) end do - - vel_dv_dt_sum = 0._wp + else $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_dv_dt_sum = vel_dv_dt_sum + vel(i)*dvel_dt(i) + do i = 1, advxe - E_idx + dadv_dt(i) = -L(momxe + i) end do + end if - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - dYs_dt(i) = -1._wp*L(chemxb + i - 1) - end do - end if + drho_dt = 0._wp; dgamma_dt = 0._wp; dpi_inf_dt = 0._wp; dqv_dt = 0._wp - ! The treatment of void fraction source is unclear - if (cyl_coord .and. cbc_dir == 2 .and. cbc_loc == 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, advxe - E_idx - dadv_dt(i) = -L(momxe + i) !+ adv_local(i) * vel(dir_idx(1))/y_cc(n) - end do - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, advxe - E_idx - dadv_dt(i) = -L(momxe + i) - end do - end if + if (model_eqns == 1) then + drho_dt = dalpha_rho_dt(1) + dgamma_dt = dadv_dt(1) + dpi_inf_dt = dadv_dt(2) + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + drho_dt = drho_dt + dalpha_rho_dt(i) + dgamma_dt = dgamma_dt + dadv_dt(i)*gammas(i) + dpi_inf_dt = dpi_inf_dt + dadv_dt(i)*pi_infs(i) + dqv_dt = dqv_dt + dalpha_rho_dt(i)*qvs(i) + end do + end if - drho_dt = 0._wp; dgamma_dt = 0._wp; dpi_inf_dt = 0._wp; dqv_dt = 0._wp + ! flux_rs_vf_l and flux_src_rs_vf_l at j = -1/2 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & + + ds(0)*dalpha_rho_dt(i) + end do - if (model_eqns == 1) then - drho_dt = dalpha_rho_dt(1) - dgamma_dt = dadv_dt(1) - dpi_inf_dt = dadv_dt(2) - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - drho_dt = drho_dt + dalpha_rho_dt(i) - dgamma_dt = dgamma_dt + dadv_dt(i)*gammas(i) - dpi_inf_dt = dpi_inf_dt + dadv_dt(i)*pi_infs(i) - dqv_dt = dqv_dt + dalpha_rho_dt(i)*qvs(i) - end do - end if + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, momxe + flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & + + ds(0)*(vel(i - contxe)*drho_dt & + + rho*dvel_dt(i - contxe)) + end do - ! flux_rs_vf_l and flux_src_rs_vf_l at j = -1/2 + if (chemistry) then + ! Evolution of LODI equation of energy for real gases adjusted to perfect gas, doi:10.1006/jcph.2002.6990 + call get_species_enthalpies_rt(T, h_k) + sum_Enthalpies = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & - + ds(0)*dalpha_rho_dt(i) + do i = 1, num_species + #:block UNDEF_AMD + h_k(i) = h_k(i)*gas_constant/molecular_weights(i)*T + sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights(i)*Cp/R_gas)*dYs_dt(i) + #:endblock UNDEF_AMD + + #:block DEF_AMD + h_k(i) = h_k(i)*gas_constant/molecular_weights_nonparameter(i)*T + sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights_nonparameter(i)*Cp/R_gas)*dYs_dt(i) + #:endblock DEF_AMD end do - + flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & + + ds(0)*((E/rho + pres/rho)*drho_dt + rho*vel_dv_dt_sum + Cp*T*L(2)/(c*c) + sum_Enthalpies) $:GPU_LOOP(parallelism='[seq]') - do i = momxb, momxe - flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & - + ds(0)*(vel(i - contxe)*drho_dt & - + rho*dvel_dt(i - contxe)) + do i = 1, num_species + flux_rs${XYZ}$_vf_l(-1, k, r, i - 1 + chemxb) = flux_rs${XYZ}$_vf_l(0, k, r, chemxb + i - 1) & + + ds(0)*(drho_dt*Ys(i) + rho*dYs_dt(i)) + end do + else + flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & + + ds(0)*(pres*dgamma_dt & + + gamma*dpres_dt & + + dpi_inf_dt & + + dqv_dt & + + rho*vel_dv_dt_sum & + + 5.e-1_wp*drho_dt*vel_K_sum) + end if + + if (riemann_solver == 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf_l(-1, k, r, i) = 0._wp end do - if (chemistry) then - ! Evolution of LODI equation of energy for real gases adjusted to perfect gas, doi:10.1006/jcph.2002.6990 - call get_species_enthalpies_rt(T, h_k) - sum_Enthalpies = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - #:block UNDEF_AMD - h_k(i) = h_k(i)*gas_constant/molecular_weights(i)*T - sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights(i)*Cp/R_gas)*dYs_dt(i) - #:endblock UNDEF_AMD - - #:block DEF_AMD - h_k(i) = h_k(i)*gas_constant/molecular_weights_nonparameter(i)*T - sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights_nonparameter(i)*Cp/R_gas)*dYs_dt(i) - #:endblock DEF_AMD - end do - flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & - + ds(0)*((E/rho + pres/rho)*drho_dt + rho*vel_dv_dt_sum + Cp*T*L(2)/(c*c) + sum_Enthalpies) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - flux_rs${XYZ}$_vf_l(-1, k, r, i - 1 + chemxb) = flux_rs${XYZ}$_vf_l(0, k, r, chemxb + i - 1) & - + ds(0)*(drho_dt*Ys(i) + rho*dYs_dt(i)) - end do - else - flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & - + ds(0)*(pres*dgamma_dt & - + gamma*dpres_dt & - + dpi_inf_dt & - + dqv_dt & - + rho*vel_dv_dt_sum & - + 5.e-1_wp*drho_dt*vel_K_sum) - end if - - if (riemann_solver == 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf_l(-1, k, r, i) = 0._wp - end do - - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = & - 1._wp/max(abs(vel(dir_idx(1))), sgm_eps) & - *sign(1._wp, vel(dir_idx(1))) & - *(flux_rs${XYZ}$_vf_l(0, k, r, i) & - + vel(dir_idx(1)) & - *flux_src_rs${XYZ}$_vf_l(0, k, r, i) & - + ds(0)*dadv_dt(i - E_idx)) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = & + 1._wp/max(abs(vel(dir_idx(1))), sgm_eps) & + *sign(1._wp, vel(dir_idx(1))) & + *(flux_rs${XYZ}$_vf_l(0, k, r, i) & + + vel(dir_idx(1)) & + *flux_src_rs${XYZ}$_vf_l(0, k, r, i) & + + ds(0)*dadv_dt(i - E_idx)) + end do - else + else - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) + & - ds(0)*dadv_dt(i - E_idx) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) + & + ds(0)*dadv_dt(i - E_idx) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = flux_src_rs${XYZ}$_vf_l(0, k, r, i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = flux_src_rs${XYZ}$_vf_l(0, k, r, i) + end do - end if - ! END: flux_rs_vf_l and flux_src_rs_vf_l at j = -1/2 + end if + ! END: flux_rs_vf_l and flux_src_rs_vf_l at j = -1/2 - end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if #:endfor @@ -1187,81 +1187,81 @@ contains ! Reshaping Inputted Data in x-direction if (cbc_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = 0, buff_size - q_prim_rsx_vf(j, k, r, i) = & - q_prim_vf(i)%sf(dj*(m - 2*j) + j, k, r) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) + do i = 1, sys_size + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = 0, buff_size + q_prim_rsx_vf(j, k, r, i) = & + q_prim_vf(i)%sf(dj*(m - 2*j) + j, k, r) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = 0, buff_size + q_prim_rsx_vf(j, k, r, momxb) = & + q_prim_vf(momxb)%sf(dj*(m - 2*j) + j, k, r)* & + sign(1._wp, -1._wp*cbc_loc) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) + do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end - do j = 0, buff_size - q_prim_rsx_vf(j, k, r, momxb) = & - q_prim_vf(momxb)%sf(dj*(m - 2*j) + j, k, r)* & + do j = -1, buff_size + flux_rsx_vf_l(j, k, r, i) = & + flux_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r)* & sign(1._wp, -1._wp*cbc_loc) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_rsx_vf_l(j, k, r, momxb) = & + flux_vf(momxb)%sf(dj*((m - 1) - 2*j) + j, k, r) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, flux_cbc_index + if (riemann_solver == 1) then + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) + do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size - flux_rsx_vf_l(j, k, r, i) = & - flux_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r)* & - sign(1._wp, -1._wp*cbc_loc) + flux_src_rsx_vf_l(j, k, r, i) = & + flux_src_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP - - #:call GPU_PARALLEL_LOOP(collapse=3) + $:END_GPU_PARALLEL_LOOP() + else + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size - flux_rsx_vf_l(j, k, r, momxb) = & - flux_vf(momxb)%sf(dj*((m - 1) - 2*j) + j, k, r) + flux_src_rsx_vf_l(j, k, r, advxb) = & + flux_src_vf(advxb)%sf(dj*((m - 1) - 2*j) + j, k, r)* & + sign(1._wp, -1._wp*cbc_loc) end do end do end do - #:endcall GPU_PARALLEL_LOOP - - if (riemann_solver == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_rsx_vf_l(j, k, r, i) = & - flux_src_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - else - #:call GPU_PARALLEL_LOOP(collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_rsx_vf_l(j, k, r, advxb) = & - flux_src_vf(advxb)%sf(dj*((m - 1) - 2*j) + j, k, r)* & - sign(1._wp, -1._wp*cbc_loc) - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if ! END: Reshaping Inputted Data in x-direction @@ -1269,81 +1269,81 @@ contains ! Reshaping Inputted Data in y-direction elseif (cbc_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = 0, buff_size - q_prim_rsy_vf(j, k, r, i) = & - q_prim_vf(i)%sf(k, dj*(n - 2*j) + j, r) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) + do i = 1, sys_size + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = 0, buff_size + q_prim_rsy_vf(j, k, r, i) = & + q_prim_vf(i)%sf(k, dj*(n - 2*j) + j, r) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = 0, buff_size + q_prim_rsy_vf(j, k, r, momxb + 1) = & + q_prim_vf(momxb + 1)%sf(k, dj*(n - 2*j) + j, r)* & + sign(1._wp, -1._wp*cbc_loc) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) + do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end - do j = 0, buff_size - q_prim_rsy_vf(j, k, r, momxb + 1) = & - q_prim_vf(momxb + 1)%sf(k, dj*(n - 2*j) + j, r)* & + do j = -1, buff_size + flux_rsy_vf_l(j, k, r, i) = & + flux_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r)* & sign(1._wp, -1._wp*cbc_loc) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_rsy_vf_l(j, k, r, momxb + 1) = & + flux_vf(momxb + 1)%sf(k, dj*((n - 1) - 2*j) + j, r) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, flux_cbc_index + if (riemann_solver == 1) then + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) + do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size - flux_rsy_vf_l(j, k, r, i) = & - flux_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r)* & - sign(1._wp, -1._wp*cbc_loc) + flux_src_rsy_vf_l(j, k, r, i) = & + flux_src_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP - - #:call GPU_PARALLEL_LOOP(collapse=3) + $:END_GPU_PARALLEL_LOOP() + else + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size - flux_rsy_vf_l(j, k, r, momxb + 1) = & - flux_vf(momxb + 1)%sf(k, dj*((n - 1) - 2*j) + j, r) + flux_src_rsy_vf_l(j, k, r, advxb) = & + flux_src_vf(advxb)%sf(k, dj*((n - 1) - 2*j) + j, r)* & + sign(1._wp, -1._wp*cbc_loc) end do end do end do - #:endcall GPU_PARALLEL_LOOP - - if (riemann_solver == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_rsy_vf_l(j, k, r, i) = & - flux_src_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - else - #:call GPU_PARALLEL_LOOP(collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_rsy_vf_l(j, k, r, advxb) = & - flux_src_vf(advxb)%sf(k, dj*((n - 1) - 2*j) + j, r)* & - sign(1._wp, -1._wp*cbc_loc) - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if ! END: Reshaping Inputted Data in y-direction @@ -1351,81 +1351,81 @@ contains ! Reshaping Inputted Data in z-direction else - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = 0, buff_size - q_prim_rsz_vf(j, k, r, i) = & - q_prim_vf(i)%sf(r, k, dj*(p - 2*j) + j) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) + do i = 1, sys_size + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = 0, buff_size + q_prim_rsz_vf(j, k, r, i) = & + q_prim_vf(i)%sf(r, k, dj*(p - 2*j) + j) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = 0, buff_size + q_prim_rsz_vf(j, k, r, momxe) = & + q_prim_vf(momxe)%sf(r, k, dj*(p - 2*j) + j)* & + sign(1._wp, -1._wp*cbc_loc) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) + do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end - do j = 0, buff_size - q_prim_rsz_vf(j, k, r, momxe) = & - q_prim_vf(momxe)%sf(r, k, dj*(p - 2*j) + j)* & + do j = -1, buff_size + flux_rsz_vf_l(j, k, r, i) = & + flux_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j)* & sign(1._wp, -1._wp*cbc_loc) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_rsz_vf_l(j, k, r, momxe) = & + flux_vf(momxe)%sf(r, k, dj*((p - 1) - 2*j) + j) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, flux_cbc_index + if (riemann_solver == 1) then + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) + do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size - flux_rsz_vf_l(j, k, r, i) = & - flux_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j)* & - sign(1._wp, -1._wp*cbc_loc) + flux_src_rsz_vf_l(j, k, r, i) = & + flux_src_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP - - #:call GPU_PARALLEL_LOOP(collapse=3) + $:END_GPU_PARALLEL_LOOP() + else + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size - flux_rsz_vf_l(j, k, r, momxe) = & - flux_vf(momxe)%sf(r, k, dj*((p - 1) - 2*j) + j) + flux_src_rsz_vf_l(j, k, r, advxb) = & + flux_src_vf(advxb)%sf(r, k, dj*((p - 1) - 2*j) + j)* & + sign(1._wp, -1._wp*cbc_loc) end do end do end do - #:endcall GPU_PARALLEL_LOOP - - if (riemann_solver == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_rsz_vf_l(j, k, r, i) = & - flux_src_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - else - #:call GPU_PARALLEL_LOOP(collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_rsz_vf_l(j, k, r, advxb) = & - flux_src_vf(advxb)%sf(r, k, dj*((p - 1) - 2*j) + j)* & - sign(1._wp, -1._wp*cbc_loc) - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -1455,111 +1455,111 @@ contains ! Reshaping Outputted Data in x-direction if (cbc_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, flux_cbc_index - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r) = & - flux_rsx_vf_l(j, k, r, i)* & - sign(1._wp, -1._wp*cbc_loc) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) + do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size - flux_vf(momxb)%sf(dj*((m - 1) - 2*j) + j, k, r) = & - flux_rsx_vf_l(j, k, r, momxb) + flux_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r) = & + flux_rsx_vf_l(j, k, r, i)* & + sign(1._wp, -1._wp*cbc_loc) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_vf(momxb)%sf(dj*((m - 1) - 2*j) + j, k, r) = & + flux_rsx_vf_l(j, k, r, momxb) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r) = & - flux_src_rsx_vf_l(j, k, r, i) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - else - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) + do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size - flux_src_vf(advxb)%sf(dj*((m - 1) - 2*j) + j, k, r) = & - flux_src_rsx_vf_l(j, k, r, advxb)* & - sign(1._wp, -1._wp*cbc_loc) + flux_src_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r) = & + flux_src_rsx_vf_l(j, k, r, i) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() + else + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_vf(advxb)%sf(dj*((m - 1) - 2*j) + j, k, r) = & + flux_src_rsx_vf_l(j, k, r, advxb)* & + sign(1._wp, -1._wp*cbc_loc) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() end if ! END: Reshaping Outputted Data in x-direction ! Reshaping Outputted Data in y-direction elseif (cbc_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, flux_cbc_index - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r) = & - flux_rsy_vf_l(j, k, r, i)* & - sign(1._wp, -1._wp*cbc_loc) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) + do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size - flux_vf(momxb + 1)%sf(k, dj*((n - 1) - 2*j) + j, r) = & - flux_rsy_vf_l(j, k, r, momxb + 1) + flux_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r) = & + flux_rsy_vf_l(j, k, r, i)* & + sign(1._wp, -1._wp*cbc_loc) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_vf(momxb + 1)%sf(k, dj*((n - 1) - 2*j) + j, r) = & + flux_rsy_vf_l(j, k, r, momxb + 1) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r) = & - flux_src_rsy_vf_l(j, k, r, i) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - else - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) + do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size - flux_src_vf(advxb)%sf(k, dj*((n - 1) - 2*j) + j, r) = & - flux_src_rsy_vf_l(j, k, r, advxb)* & - sign(1._wp, -1._wp*cbc_loc) + flux_src_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r) = & + flux_src_rsy_vf_l(j, k, r, i) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() + else + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_vf(advxb)%sf(k, dj*((n - 1) - 2*j) + j, r) = & + flux_src_rsy_vf_l(j, k, r, advxb)* & + sign(1._wp, -1._wp*cbc_loc) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() end if ! END: Reshaping Outputted Data in y-direction @@ -1567,56 +1567,56 @@ contains ! Reshaping Outputted Data in z-direction else - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, flux_cbc_index - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j) = & - flux_rsz_vf_l(j, k, r, i)* & - sign(1._wp, -1._wp*cbc_loc) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) + do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size - flux_vf(momxe)%sf(r, k, dj*((p - 1) - 2*j) + j) = & - flux_rsz_vf_l(j, k, r, momxe) + flux_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j) = & + flux_rsz_vf_l(j, k, r, i)* & + sign(1._wp, -1._wp*cbc_loc) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_vf(momxe)%sf(r, k, dj*((p - 1) - 2*j) + j) = & + flux_rsz_vf_l(j, k, r, momxe) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j) = & - flux_src_rsz_vf_l(j, k, r, i) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - else - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) + do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size - flux_src_vf(advxb)%sf(r, k, dj*((p - 1) - 2*j) + j) = & - flux_src_rsz_vf_l(j, k, r, advxb)* & - sign(1._wp, -1._wp*cbc_loc) + flux_src_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j) = & + flux_src_rsz_vf_l(j, k, r, i) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() + else + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_vf(advxb)%sf(r, k, dj*((p - 1) - 2*j) + j) = & + flux_src_rsz_vf_l(j, k, r, advxb)* & + sign(1._wp, -1._wp*cbc_loc) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() end if end if diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 7ae10f4408..e333a86f82 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -279,24 +279,24 @@ contains integer :: j, k, l ! Computing Stability Criteria at Current Time-step - #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re]') - do l = 0, p - do k = 0, n - do j = 0, m - call s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,vel, alpha, Re]') + do l = 0, p + do k = 0, n + do j = 0, m + call s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) - call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c) + call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c) - if (viscous) then - call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf, vcfl_sf, Rc_sf) - else - call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf) - end if + if (viscous) then + call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf, vcfl_sf, Rc_sf) + else + call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf) + end if - end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() ! end: Computing Stability Criteria at Current Time-step diff --git a/src/simulation/m_derived_variables.fpp b/src/simulation/m_derived_variables.fpp index f4653a9366..c49929e928 100644 --- a/src/simulation/m_derived_variables.fpp +++ b/src/simulation/m_derived_variables.fpp @@ -146,24 +146,24 @@ contains z_accel) end if - #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - if (p > 0) then - accel_mag(i, j, k) = sqrt(x_accel(i, j, k)**2._wp + & - y_accel(i, j, k)**2._wp + & - z_accel(i, j, k)**2._wp) - elseif (n > 0) then - accel_mag(i, j, k) = sqrt(x_accel(i, j, k)**2._wp + & - y_accel(i, j, k)**2._wp) - else - accel_mag(i, j, k) = x_accel(i, j, k) - end if - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) + do k = 0, p + do j = 0, n + do i = 0, m + if (p > 0) then + accel_mag(i, j, k) = sqrt(x_accel(i, j, k)**2._wp + & + y_accel(i, j, k)**2._wp + & + z_accel(i, j, k)**2._wp) + elseif (n > 0) then + accel_mag(i, j, k) = sqrt(x_accel(i, j, k)**2._wp + & + y_accel(i, j, k)**2._wp) + else + accel_mag(i, j, k) = x_accel(i, j, k) + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() $:GPU_UPDATE(host='[accel_mag]') @@ -204,35 +204,70 @@ contains ! Computing the acceleration component in the x-coordinate direction if (i == 1) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + q_sf(j, k, l) = (11._wp*q_prim_vf0(momxb)%sf(j, k, l) & + - 18._wp*q_prim_vf1(momxb)%sf(j, k, l) & + + 9._wp*q_prim_vf2(momxb)%sf(j, k, l) & + - 2._wp*q_prim_vf3(momxb)%sf(j, k, l))/(6._wp*dt) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + if (n == 0) then + $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) do l = 0, p do k = 0, n do j = 0, m - q_sf(j, k, l) = (11._wp*q_prim_vf0(momxb)%sf(j, k, l) & - - 18._wp*q_prim_vf1(momxb)%sf(j, k, l) & - + 9._wp*q_prim_vf2(momxb)%sf(j, k, l) & - - 2._wp*q_prim_vf3(momxb)%sf(j, k, l))/(6._wp*dt) + do r = -fd_number, fd_number + q_sf(j, k, l) = q_sf(j, k, l) & + + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & + q_prim_vf0(momxb)%sf(r + j, k, l) + end do end do end do end do - #:endcall GPU_PARALLEL_LOOP - - if (n == 0) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:END_GPU_PARALLEL_LOOP() + elseif (p == 0) then + $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) + do l = 0, p + do k = 0, n + do j = 0, m + do r = -fd_number, fd_number + q_sf(j, k, l) = q_sf(j, k, l) & + + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & + q_prim_vf0(momxb)%sf(r + j, k, l) & + + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & + q_prim_vf0(momxb)%sf(j, r + k, l) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + else + if (grid_geometry == 3) then + $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) do l = 0, p do k = 0, n do j = 0, m do r = -fd_number, fd_number q_sf(j, k, l) = q_sf(j, k, l) & + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxb)%sf(r + j, k, l) + q_prim_vf0(momxb)%sf(r + j, k, l) & + + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & + q_prim_vf0(momxb)%sf(j, r + k, l) & + + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & + q_prim_vf0(momxb)%sf(j, k, r + l)/y_cc(k) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP - elseif (p == 0) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:END_GPU_PARALLEL_LOOP() + else + $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) do l = 0, p do k = 0, n do j = 0, m @@ -241,68 +276,50 @@ contains + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & q_prim_vf0(momxb)%sf(r + j, k, l) & + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxb)%sf(j, r + k, l) + q_prim_vf0(momxb)%sf(j, r + k, l) & + + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & + q_prim_vf0(momxb)%sf(j, k, r + l) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP - else - if (grid_geometry == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do l = 0, p - do k = 0, n - do j = 0, m - do r = -fd_number, fd_number - q_sf(j, k, l) = q_sf(j, k, l) & - + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxb)%sf(r + j, k, l) & - + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxb)%sf(j, r + k, l) & - + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & - q_prim_vf0(momxb)%sf(j, k, r + l)/y_cc(k) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - else - #:call GPU_PARALLEL_LOOP(collapse=4) - do l = 0, p - do k = 0, n - do j = 0, m - do r = -fd_number, fd_number - q_sf(j, k, l) = q_sf(j, k, l) & - + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxb)%sf(r + j, k, l) & - + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxb)%sf(j, r + k, l) & - + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & - q_prim_vf0(momxb)%sf(j, k, r + l) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if ! Computing the acceleration component in the y-coordinate direction elseif (i == 2) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + q_sf(j, k, l) = (11._wp*q_prim_vf0(momxb + 1)%sf(j, k, l) & + - 18._wp*q_prim_vf1(momxb + 1)%sf(j, k, l) & + + 9._wp*q_prim_vf2(momxb + 1)%sf(j, k, l) & + - 2._wp*q_prim_vf3(momxb + 1)%sf(j, k, l))/(6._wp*dt) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + if (p == 0) then + $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) do l = 0, p do k = 0, n do j = 0, m - q_sf(j, k, l) = (11._wp*q_prim_vf0(momxb + 1)%sf(j, k, l) & - - 18._wp*q_prim_vf1(momxb + 1)%sf(j, k, l) & - + 9._wp*q_prim_vf2(momxb + 1)%sf(j, k, l) & - - 2._wp*q_prim_vf3(momxb + 1)%sf(j, k, l))/(6._wp*dt) + do r = -fd_number, fd_number + q_sf(j, k, l) = q_sf(j, k, l) & + + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & + q_prim_vf0(momxb + 1)%sf(r + j, k, l) & + + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & + q_prim_vf0(momxb + 1)%sf(j, r + k, l) + end do end do end do end do - #:endcall GPU_PARALLEL_LOOP - - if (p == 0) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:END_GPU_PARALLEL_LOOP() + else + if (grid_geometry == 3) then + $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) do l = 0, p do k = 0, n do j = 0, m @@ -311,105 +328,88 @@ contains + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & q_prim_vf0(momxb + 1)%sf(r + j, k, l) & + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxb + 1)%sf(j, r + k, l) + q_prim_vf0(momxb + 1)%sf(j, r + k, l) & + + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & + q_prim_vf0(momxb + 1)%sf(j, k, r + l)/y_cc(k) & + - (q_prim_vf0(momxe)%sf(j, k, l)**2._wp)/y_cc(k) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP - else - if (grid_geometry == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do l = 0, p - do k = 0, n - do j = 0, m - do r = -fd_number, fd_number - q_sf(j, k, l) = q_sf(j, k, l) & - + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxb + 1)%sf(r + j, k, l) & - + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxb + 1)%sf(j, r + k, l) & - + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & - q_prim_vf0(momxb + 1)%sf(j, k, r + l)/y_cc(k) & - - (q_prim_vf0(momxe)%sf(j, k, l)**2._wp)/y_cc(k) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else - #:call GPU_PARALLEL_LOOP(collapse=4) - do l = 0, p - do k = 0, n - do j = 0, m - do r = -fd_number, fd_number - q_sf(j, k, l) = q_sf(j, k, l) & - + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxb + 1)%sf(r + j, k, l) & - + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxb + 1)%sf(j, r + k, l) & - + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & - q_prim_vf0(momxb + 1)%sf(j, k, r + l) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) + do l = 0, p + do k = 0, n + do j = 0, m + do r = -fd_number, fd_number + q_sf(j, k, l) = q_sf(j, k, l) & + + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & + q_prim_vf0(momxb + 1)%sf(r + j, k, l) & + + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & + q_prim_vf0(momxb + 1)%sf(j, r + k, l) & + + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & + q_prim_vf0(momxb + 1)%sf(j, k, r + l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end if ! Computing the acceleration component in the z-coordinate direction else - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - q_sf(j, k, l) = (11._wp*q_prim_vf0(momxe)%sf(j, k, l) & - - 18._wp*q_prim_vf1(momxe)%sf(j, k, l) & - + 9._wp*q_prim_vf2(momxe)%sf(j, k, l) & - - 2._wp*q_prim_vf3(momxe)%sf(j, k, l))/(6._wp*dt) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + q_sf(j, k, l) = (11._wp*q_prim_vf0(momxe)%sf(j, k, l) & + - 18._wp*q_prim_vf1(momxe)%sf(j, k, l) & + + 9._wp*q_prim_vf2(momxe)%sf(j, k, l) & + - 2._wp*q_prim_vf3(momxe)%sf(j, k, l))/(6._wp*dt) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (grid_geometry == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do l = 0, p - do k = 0, n - do j = 0, m - do r = -fd_number, fd_number - q_sf(j, k, l) = q_sf(j, k, l) & - + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxe)%sf(r + j, k, l) & - + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxe)%sf(j, r + k, l) & - + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & - q_prim_vf0(momxe)%sf(j, k, r + l)/y_cc(k) & - + (q_prim_vf0(momxe)%sf(j, k, l)* & - q_prim_vf0(momxb + 1)%sf(j, k, l))/y_cc(k) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) + do l = 0, p + do k = 0, n + do j = 0, m + do r = -fd_number, fd_number + q_sf(j, k, l) = q_sf(j, k, l) & + + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & + q_prim_vf0(momxe)%sf(r + j, k, l) & + + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & + q_prim_vf0(momxe)%sf(j, r + k, l) & + + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & + q_prim_vf0(momxe)%sf(j, k, r + l)/y_cc(k) & + + (q_prim_vf0(momxe)%sf(j, k, l)* & + q_prim_vf0(momxb + 1)%sf(j, k, l))/y_cc(k) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() else - #:call GPU_PARALLEL_LOOP(collapse=4) - do l = 0, p - do k = 0, n - do j = 0, m - do r = -fd_number, fd_number - q_sf(j, k, l) = q_sf(j, k, l) & - + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxe)%sf(r + j, k, l) & - + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxe)%sf(j, r + k, l) & - + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & - q_prim_vf0(momxe)%sf(j, k, r + l) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) + do l = 0, p + do k = 0, n + do j = 0, m + do r = -fd_number, fd_number + q_sf(j, k, l) = q_sf(j, k, l) & + + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & + q_prim_vf0(momxe)%sf(r + j, k, l) & + + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & + q_prim_vf0(momxe)%sf(j, r + k, l) & + + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & + q_prim_vf0(momxe)%sf(j, k, r + l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end if @@ -438,81 +438,81 @@ contains end do if (n == 0) then !1D simulation - #:call GPU_PARALLEL_LOOP(collapse=3,private='[dV]') - do l = 0, p !Loop over grid - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids !Loop over individual fluids - dV = dx(j) - ! Mass - $:GPU_ATOMIC(atomic='update') - c_m(i, 1) = c_m(i, 1) + q_vf(i)%sf(j, k, l)*dV - ! x-location weighted - $:GPU_ATOMIC(atomic='update') - c_m(i, 2) = c_m(i, 2) + q_vf(i)%sf(j, k, l)*dV*x_cc(j) - ! Volume fraction - $:GPU_ATOMIC(atomic='update') - c_m(i, 5) = c_m(i, 5) + q_vf(i + advxb - 1)%sf(j, k, l)*dV - end do + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,dV]') + do l = 0, p !Loop over grid + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids !Loop over individual fluids + dV = dx(j) + ! Mass + $:GPU_ATOMIC(atomic='update') + c_m(i, 1) = c_m(i, 1) + q_vf(i)%sf(j, k, l)*dV + ! x-location weighted + $:GPU_ATOMIC(atomic='update') + c_m(i, 2) = c_m(i, 2) + q_vf(i)%sf(j, k, l)*dV*x_cc(j) + ! Volume fraction + $:GPU_ATOMIC(atomic='update') + c_m(i, 5) = c_m(i, 5) + q_vf(i + advxb - 1)%sf(j, k, l)*dV end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() elseif (p == 0) then !2D simulation - #:call GPU_PARALLEL_LOOP(collapse=3,private='[dV]') - do l = 0, p !Loop over grid - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids !Loop over individual fluids - dV = dx(j)*dy(k) - ! Mass - $:GPU_ATOMIC(atomic='update') - c_m(i, 1) = c_m(i, 1) + q_vf(i)%sf(j, k, l)*dV - ! x-location weighted - $:GPU_ATOMIC(atomic='update') - c_m(i, 2) = c_m(i, 2) + q_vf(i)%sf(j, k, l)*dV*x_cc(j) - ! y-location weighted - $:GPU_ATOMIC(atomic='update') - c_m(i, 3) = c_m(i, 3) + q_vf(i)%sf(j, k, l)*dV*y_cc(k) - ! Volume fraction - $:GPU_ATOMIC(atomic='update') - c_m(i, 5) = c_m(i, 5) + q_vf(i + advxb - 1)%sf(j, k, l)*dV - end do + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,dV]') + do l = 0, p !Loop over grid + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids !Loop over individual fluids + dV = dx(j)*dy(k) + ! Mass + $:GPU_ATOMIC(atomic='update') + c_m(i, 1) = c_m(i, 1) + q_vf(i)%sf(j, k, l)*dV + ! x-location weighted + $:GPU_ATOMIC(atomic='update') + c_m(i, 2) = c_m(i, 2) + q_vf(i)%sf(j, k, l)*dV*x_cc(j) + ! y-location weighted + $:GPU_ATOMIC(atomic='update') + c_m(i, 3) = c_m(i, 3) + q_vf(i)%sf(j, k, l)*dV*y_cc(k) + ! Volume fraction + $:GPU_ATOMIC(atomic='update') + c_m(i, 5) = c_m(i, 5) + q_vf(i + advxb - 1)%sf(j, k, l)*dV end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() else !3D simulation - #:call GPU_PARALLEL_LOOP(collapse=3,private='[dV]') - do l = 0, p !Loop over grid - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids !Loop over individual fluids - - dV = dx(j)*dy(k)*dz(l) - ! Mass - $:GPU_ATOMIC(atomic='update') - c_m(i, 1) = c_m(i, 1) + q_vf(i)%sf(j, k, l)*dV - ! x-location weighted - $:GPU_ATOMIC(atomic='update') - c_m(i, 2) = c_m(i, 2) + q_vf(i)%sf(j, k, l)*dV*x_cc(j) - ! y-location weighted - $:GPU_ATOMIC(atomic='update') - c_m(i, 3) = c_m(i, 3) + q_vf(i)%sf(j, k, l)*dV*y_cc(k) - ! z-location weighted - $:GPU_ATOMIC(atomic='update') - c_m(i, 4) = c_m(i, 4) + q_vf(i)%sf(j, k, l)*dV*z_cc(l) - ! Volume fraction - $:GPU_ATOMIC(atomic='update') - c_m(i, 5) = c_m(i, 5) + q_vf(i + advxb - 1)%sf(j, k, l)*dV - end do + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,dV]') + do l = 0, p !Loop over grid + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids !Loop over individual fluids + + dV = dx(j)*dy(k)*dz(l) + ! Mass + $:GPU_ATOMIC(atomic='update') + c_m(i, 1) = c_m(i, 1) + q_vf(i)%sf(j, k, l)*dV + ! x-location weighted + $:GPU_ATOMIC(atomic='update') + c_m(i, 2) = c_m(i, 2) + q_vf(i)%sf(j, k, l)*dV*x_cc(j) + ! y-location weighted + $:GPU_ATOMIC(atomic='update') + c_m(i, 3) = c_m(i, 3) + q_vf(i)%sf(j, k, l)*dV*y_cc(k) + ! z-location weighted + $:GPU_ATOMIC(atomic='update') + c_m(i, 4) = c_m(i, 4) + q_vf(i)%sf(j, k, l)*dV*z_cc(l) + ! Volume fraction + $:GPU_ATOMIC(atomic='update') + c_m(i, 5) = c_m(i, 5) + q_vf(i + advxb - 1)%sf(j, k, l)*dV end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if $:GPU_UPDATE(host='[c_m]') diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 39c8bd493e..f381e800d4 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -141,25 +141,25 @@ contains if (bc_y%beg >= 0) return #if defined(MFC_GPU) - #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, cmplx_size - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, cmplx_size + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, 0, l) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, 0, l) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() #:if not USING_NVHPC p_real => data_real_gpu @@ -179,15 +179,15 @@ contains Nfq = 3 $:GPU_UPDATE(device='[Nfq]') - #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, Nfq - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, Nfq + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_fltr_cmplx]') #if defined(__PGI) @@ -198,38 +198,38 @@ contains #endif #:endcall GPU_HOST_DATA - #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) - q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) + q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() do i = 1, fourier_rings - #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, cmplx_size - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, cmplx_size + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) end do end do - #:endcall GPU_PARALLEL_LOOP - - #:call GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l) - end do + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3, firstprivate='[i]') + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_cmplx]') #if defined(__PGI) @@ -243,15 +243,15 @@ contains Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) $:GPU_UPDATE(device='[Nfq]') - #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, Nfq - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, Nfq + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_fltr_cmplx]') #if defined(__PGI) @@ -262,16 +262,16 @@ contains #endif #:endcall GPU_HOST_DATA - #:call GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) - q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3, firstprivate='[i]') + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) + q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end do #:endcall GPU_DATA diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index 2b171016cb..d6646958bd 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -106,108 +106,108 @@ contains real(wp) :: G_local integer :: j, k, l, i, r - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_K, alpha_rho_K, rho, gamma, pi_inf, qv, G_local, Re, tensora, tensorb]') - do l = 0, p - do k = 0, n - do j = 0, m + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,alpha_K, alpha_rho_K, rho, gamma, pi_inf, qv, G_local, Re, tensora, tensorb]') + do l = 0, p + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_k(i) = q_cons_vf(i)%sf(j, k, l) + alpha_k(i) = q_cons_vf(advxb + i - 1)%sf(j, k, l) + end do + ! If in simulation, use acc mixture subroutines + call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, alpha_k, & + alpha_rho_k, Re, G_local, Gs_hyper) + rho = max(rho, sgm_eps) + G_local = max(G_local, sgm_eps) + !if ( G_local <= verysmall ) G_K = 0._wp + + if (G_local > verysmall) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, tensor_size + tensora(i) = 0._wp + end do + ! STEP 1: computing the grad_xi tensor using finite differences + ! grad_xi definition / organization + ! number for the tensor 1-3: dxix_dx, dxiy_dx, dxiz_dx + ! 4-6 : dxix_dy, dxiy_dy, dxiz_dy + ! 7-9 : dxix_dz, dxiy_dz, dxiz_dz $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_k(i) = q_cons_vf(i)%sf(j, k, l) - alpha_k(i) = q_cons_vf(advxb + i - 1)%sf(j, k, l) + do r = -fd_number, fd_number + ! derivatives in the x-direction + tensora(1) = tensora(1) + q_prim_vf(xibeg)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) + tensora(2) = tensora(2) + q_prim_vf(xibeg + 1)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) + tensora(3) = tensora(3) + q_prim_vf(xiend)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) + ! derivatives in the y-direction + tensora(4) = tensora(4) + q_prim_vf(xibeg)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) + tensora(5) = tensora(5) + q_prim_vf(xibeg + 1)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) + tensora(6) = tensora(6) + q_prim_vf(xiend)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) + ! derivatives in the z-direction + tensora(7) = tensora(7) + q_prim_vf(xibeg)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) + tensora(8) = tensora(8) + q_prim_vf(xibeg + 1)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) + tensora(9) = tensora(9) + q_prim_vf(xiend)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) end do - ! If in simulation, use acc mixture subroutines - call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, alpha_k, & - alpha_rho_k, Re, G_local, Gs_hyper) - rho = max(rho, sgm_eps) - G_local = max(G_local, sgm_eps) - !if ( G_local <= verysmall ) G_K = 0._wp - - if (G_local > verysmall) then + ! STEP 2a: computing the adjoint of the grad_xi tensor for the inverse + tensorb(1) = tensora(5)*tensora(9) - tensora(6)*tensora(8) + tensorb(2) = -(tensora(2)*tensora(9) - tensora(3)*tensora(8)) + tensorb(3) = tensora(2)*tensora(6) - tensora(3)*tensora(5) + tensorb(4) = -(tensora(4)*tensora(9) - tensora(6)*tensora(7)) + tensorb(5) = tensora(1)*tensora(9) - tensora(3)*tensora(7) + tensorb(6) = -(tensora(1)*tensora(6) - tensora(4)*tensora(3)) + tensorb(7) = tensora(4)*tensora(8) - tensora(5)*tensora(7) + tensorb(8) = -(tensora(1)*tensora(8) - tensora(2)*tensora(7)) + tensorb(9) = tensora(1)*tensora(5) - tensora(2)*tensora(4) + + ! STEP 2b: computing the determinant of the grad_xi tensor + tensorb(tensor_size) = tensora(1)*(tensora(5)*tensora(9) - tensora(6)*tensora(8)) & + - tensora(2)*(tensora(4)*tensora(9) - tensora(6)*tensora(7)) & + + tensora(3)*(tensora(4)*tensora(8) - tensora(5)*tensora(7)) + + if (tensorb(tensor_size) > verysmall) then + ! STEP 2c: computing the inverse of grad_xi tensor = F + ! tensorb is the adjoint, tensora becomes F $:GPU_LOOP(parallelism='[seq]') - do i = 1, tensor_size - tensora(i) = 0._wp + do i = 1, tensor_size - 1 + tensora(i) = tensorb(i)/tensorb(tensor_size) end do - ! STEP 1: computing the grad_xi tensor using finite differences - ! grad_xi definition / organization - ! number for the tensor 1-3: dxix_dx, dxiy_dx, dxiz_dx - ! 4-6 : dxix_dy, dxiy_dy, dxiz_dy - ! 7-9 : dxix_dz, dxiy_dz, dxiz_dz + + ! STEP 2d: computing the J = det(F) = 1/det(\grad{\xi}) + tensorb(tensor_size) = 1._wp/tensorb(tensor_size) + + ! STEP 3: computing F transpose F + tensorb(1) = tensora(1)**2 + tensora(2)**2 + tensora(3)**2 + tensorb(5) = tensora(4)**2 + tensora(5)**2 + tensora(6)**2 + tensorb(9) = tensora(7)**2 + tensora(8)**2 + tensora(9)**2 + tensorb(2) = tensora(1)*tensora(4) + tensora(2)*tensora(5) + tensora(3)*tensora(6) + tensorb(3) = tensora(1)*tensora(7) + tensora(2)*tensora(8) + tensora(3)*tensora(9) + tensorb(6) = tensora(4)*tensora(7) + tensora(5)*tensora(8) + tensora(6)*tensora(9) + ! STEP 4: update the btensor, this is consistent with Riemann solvers + #:for BIJ, TXY in [(1,1),(2,2),(3,5),(4,3),(5,6),(6,9)] + btensor%vf(${BIJ}$)%sf(j, k, l) = tensorb(${TXY}$) + #:endfor + ! store the determinant at the last entry of the btensor + btensor%vf(b_size)%sf(j, k, l) = tensorb(tensor_size) + ! STEP 5a: updating the Cauchy stress primitive scalar field + if (hyper_model == 1) then + call s_neoHookean_cauchy_solver(btensor%vf, q_prim_vf, G_local, j, k, l) + elseif (hyper_model == 2) then + call s_Mooney_Rivlin_cauchy_solver(btensor%vf, q_prim_vf, G_local, j, k, l) + end if + ! STEP 5b: updating the pressure field + q_prim_vf(E_idx)%sf(j, k, l) = q_prim_vf(E_idx)%sf(j, k, l) - & + G_local*q_prim_vf(xiend + 1)%sf(j, k, l)/gamma + ! STEP 5c: updating the Cauchy stress conservative scalar field $:GPU_LOOP(parallelism='[seq]') - do r = -fd_number, fd_number - ! derivatives in the x-direction - tensora(1) = tensora(1) + q_prim_vf(xibeg)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) - tensora(2) = tensora(2) + q_prim_vf(xibeg + 1)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) - tensora(3) = tensora(3) + q_prim_vf(xiend)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) - ! derivatives in the y-direction - tensora(4) = tensora(4) + q_prim_vf(xibeg)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) - tensora(5) = tensora(5) + q_prim_vf(xibeg + 1)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) - tensora(6) = tensora(6) + q_prim_vf(xiend)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) - ! derivatives in the z-direction - tensora(7) = tensora(7) + q_prim_vf(xibeg)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) - tensora(8) = tensora(8) + q_prim_vf(xibeg + 1)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) - tensora(9) = tensora(9) + q_prim_vf(xiend)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) + do i = 1, b_size - 1 + q_cons_vf(strxb + i - 1)%sf(j, k, l) = & + rho*q_prim_vf(strxb + i - 1)%sf(j, k, l) end do - ! STEP 2a: computing the adjoint of the grad_xi tensor for the inverse - tensorb(1) = tensora(5)*tensora(9) - tensora(6)*tensora(8) - tensorb(2) = -(tensora(2)*tensora(9) - tensora(3)*tensora(8)) - tensorb(3) = tensora(2)*tensora(6) - tensora(3)*tensora(5) - tensorb(4) = -(tensora(4)*tensora(9) - tensora(6)*tensora(7)) - tensorb(5) = tensora(1)*tensora(9) - tensora(3)*tensora(7) - tensorb(6) = -(tensora(1)*tensora(6) - tensora(4)*tensora(3)) - tensorb(7) = tensora(4)*tensora(8) - tensora(5)*tensora(7) - tensorb(8) = -(tensora(1)*tensora(8) - tensora(2)*tensora(7)) - tensorb(9) = tensora(1)*tensora(5) - tensora(2)*tensora(4) - - ! STEP 2b: computing the determinant of the grad_xi tensor - tensorb(tensor_size) = tensora(1)*(tensora(5)*tensora(9) - tensora(6)*tensora(8)) & - - tensora(2)*(tensora(4)*tensora(9) - tensora(6)*tensora(7)) & - + tensora(3)*(tensora(4)*tensora(8) - tensora(5)*tensora(7)) - - if (tensorb(tensor_size) > verysmall) then - ! STEP 2c: computing the inverse of grad_xi tensor = F - ! tensorb is the adjoint, tensora becomes F - $:GPU_LOOP(parallelism='[seq]') - do i = 1, tensor_size - 1 - tensora(i) = tensorb(i)/tensorb(tensor_size) - end do - - ! STEP 2d: computing the J = det(F) = 1/det(\grad{\xi}) - tensorb(tensor_size) = 1._wp/tensorb(tensor_size) - - ! STEP 3: computing F transpose F - tensorb(1) = tensora(1)**2 + tensora(2)**2 + tensora(3)**2 - tensorb(5) = tensora(4)**2 + tensora(5)**2 + tensora(6)**2 - tensorb(9) = tensora(7)**2 + tensora(8)**2 + tensora(9)**2 - tensorb(2) = tensora(1)*tensora(4) + tensora(2)*tensora(5) + tensora(3)*tensora(6) - tensorb(3) = tensora(1)*tensora(7) + tensora(2)*tensora(8) + tensora(3)*tensora(9) - tensorb(6) = tensora(4)*tensora(7) + tensora(5)*tensora(8) + tensora(6)*tensora(9) - ! STEP 4: update the btensor, this is consistent with Riemann solvers - #:for BIJ, TXY in [(1,1),(2,2),(3,5),(4,3),(5,6),(6,9)] - btensor%vf(${BIJ}$)%sf(j, k, l) = tensorb(${TXY}$) - #:endfor - ! store the determinant at the last entry of the btensor - btensor%vf(b_size)%sf(j, k, l) = tensorb(tensor_size) - ! STEP 5a: updating the Cauchy stress primitive scalar field - if (hyper_model == 1) then - call s_neoHookean_cauchy_solver(btensor%vf, q_prim_vf, G_local, j, k, l) - elseif (hyper_model == 2) then - call s_Mooney_Rivlin_cauchy_solver(btensor%vf, q_prim_vf, G_local, j, k, l) - end if - ! STEP 5b: updating the pressure field - q_prim_vf(E_idx)%sf(j, k, l) = q_prim_vf(E_idx)%sf(j, k, l) - & - G_local*q_prim_vf(xiend + 1)%sf(j, k, l)/gamma - ! STEP 5c: updating the Cauchy stress conservative scalar field - $:GPU_LOOP(parallelism='[seq]') - do i = 1, b_size - 1 - q_cons_vf(strxb + i - 1)%sf(j, k, l) = & - rho*q_prim_vf(strxb + i - 1)%sf(j, k, l) - end do - end if end if - end do + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_hyperelastic_rmt_stress_update !> The following subroutine handles the calculation of the btensor. diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index 312c2343b6..aed818fda9 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -104,272 +104,272 @@ contains ! calculate velocity gradients + rho_K and G_K ! TODO: re-organize these loops one by one for GPU efficiency if possible? - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) + do q = 0, p + do l = 0, n + do k = 0, m + du_dx_hypo(k, l, q) = 0._wp + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) + do q = 0, p + do l = 0, n + do k = 0, m + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + du_dx_hypo(k, l, q) = du_dx_hypo(k, l, q) & + + q_prim_vf(momxb)%sf(k + r, l, q)*fd_coeff_x_hypo(r, k) + end do + + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + if (ndirs > 1) then + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) do q = 0, p do l = 0, n do k = 0, m - du_dx_hypo(k, l, q) = 0._wp + du_dy_hypo(k, l, q) = 0._wp; dv_dx_hypo(k, l, q) = 0._wp; dv_dy_hypo(k, l, q) = 0._wp end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) do q = 0, p do l = 0, n do k = 0, m $:GPU_LOOP(parallelism='[seq]') do r = -fd_number, fd_number - du_dx_hypo(k, l, q) = du_dx_hypo(k, l, q) & - + q_prim_vf(momxb)%sf(k + r, l, q)*fd_coeff_x_hypo(r, k) + du_dy_hypo(k, l, q) = du_dy_hypo(k, l, q) & + + q_prim_vf(momxb)%sf(k, l + r, q)*fd_coeff_y_hypo(r, l) + dv_dx_hypo(k, l, q) = dv_dx_hypo(k, l, q) & + + q_prim_vf(momxb + 1)%sf(k + r, l, q)*fd_coeff_x_hypo(r, k) + dv_dy_hypo(k, l, q) = dv_dy_hypo(k, l, q) & + + q_prim_vf(momxb + 1)%sf(k, l + r, q)*fd_coeff_y_hypo(r, l) end do - end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() - if (ndirs > 1) then - #:call GPU_PARALLEL_LOOP(collapse=3) + ! 3D + if (ndirs == 3) then + + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) do q = 0, p do l = 0, n do k = 0, m - du_dy_hypo(k, l, q) = 0._wp; dv_dx_hypo(k, l, q) = 0._wp; dv_dy_hypo(k, l, q) = 0._wp + du_dz_hypo(k, l, q) = 0._wp; dv_dz_hypo(k, l, q) = 0._wp; dw_dx_hypo(k, l, q) = 0._wp; + dw_dy_hypo(k, l, q) = 0._wp; dw_dz_hypo(k, l, q) = 0._wp; end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) do q = 0, p do l = 0, n do k = 0, m $:GPU_LOOP(parallelism='[seq]') do r = -fd_number, fd_number - du_dy_hypo(k, l, q) = du_dy_hypo(k, l, q) & - + q_prim_vf(momxb)%sf(k, l + r, q)*fd_coeff_y_hypo(r, l) - dv_dx_hypo(k, l, q) = dv_dx_hypo(k, l, q) & - + q_prim_vf(momxb + 1)%sf(k + r, l, q)*fd_coeff_x_hypo(r, k) - dv_dy_hypo(k, l, q) = dv_dy_hypo(k, l, q) & - + q_prim_vf(momxb + 1)%sf(k, l + r, q)*fd_coeff_y_hypo(r, l) + du_dz_hypo(k, l, q) = du_dz_hypo(k, l, q) & + + q_prim_vf(momxb)%sf(k, l, q + r)*fd_coeff_z_hypo(r, q) + dv_dz_hypo(k, l, q) = dv_dz_hypo(k, l, q) & + + q_prim_vf(momxb + 1)%sf(k, l, q + r)*fd_coeff_z_hypo(r, q) + dw_dx_hypo(k, l, q) = dw_dx_hypo(k, l, q) & + + q_prim_vf(momxe)%sf(k + r, l, q)*fd_coeff_x_hypo(r, k) + dw_dy_hypo(k, l, q) = dw_dy_hypo(k, l, q) & + + q_prim_vf(momxe)%sf(k, l + r, q)*fd_coeff_y_hypo(r, l) + dw_dz_hypo(k, l, q) = dw_dz_hypo(k, l, q) & + + q_prim_vf(momxe)%sf(k, l, q + r)*fd_coeff_z_hypo(r, q) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP - - ! 3D - if (ndirs == 3) then - - #:call GPU_PARALLEL_LOOP(collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - du_dz_hypo(k, l, q) = 0._wp; dv_dz_hypo(k, l, q) = 0._wp; dw_dx_hypo(k, l, q) = 0._wp; - dw_dy_hypo(k, l, q) = 0._wp; dw_dz_hypo(k, l, q) = 0._wp; - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - - #:call GPU_PARALLEL_LOOP(collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - $:GPU_LOOP(parallelism='[seq]') - do r = -fd_number, fd_number - du_dz_hypo(k, l, q) = du_dz_hypo(k, l, q) & - + q_prim_vf(momxb)%sf(k, l, q + r)*fd_coeff_z_hypo(r, q) - dv_dz_hypo(k, l, q) = dv_dz_hypo(k, l, q) & - + q_prim_vf(momxb + 1)%sf(k, l, q + r)*fd_coeff_z_hypo(r, q) - dw_dx_hypo(k, l, q) = dw_dx_hypo(k, l, q) & - + q_prim_vf(momxe)%sf(k + r, l, q)*fd_coeff_x_hypo(r, k) - dw_dy_hypo(k, l, q) = dw_dy_hypo(k, l, q) & - + q_prim_vf(momxe)%sf(k, l + r, q)*fd_coeff_y_hypo(r, l) - dw_dz_hypo(k, l, q) = dw_dz_hypo(k, l, q) & - + q_prim_vf(momxe)%sf(k, l, q + r)*fd_coeff_z_hypo(r, q) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if - #:call GPU_PARALLEL_LOOP(collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - rho_K = 0._wp; G_K = 0._wp - do i = 1, num_fluids - rho_K = rho_K + q_prim_vf(i)%sf(k, l, q) !alpha_rho_K(1) - G_K = G_K + q_prim_vf(advxb - 1 + i)%sf(k, l, q)*Gs_hypo(i) !alpha_K(1) * Gs_hypo(1) - end do + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) + do q = 0, p + do l = 0, n + do k = 0, m + rho_K = 0._wp; G_K = 0._wp + do i = 1, num_fluids + rho_K = rho_K + q_prim_vf(i)%sf(k, l, q) !alpha_rho_K(1) + G_K = G_K + q_prim_vf(advxb - 1 + i)%sf(k, l, q)*Gs_hypo(i) !alpha_K(1) * Gs_hypo(1) + end do - if (cont_damage) G_K = G_K*max((1._wp - q_prim_vf(damage_idx)%sf(k, l, q)), 0._wp) + if (cont_damage) G_K = G_K*max((1._wp - q_prim_vf(damage_idx)%sf(k, l, q)), 0._wp) - rho_K_field(k, l, q) = rho_K - G_K_field(k, l, q) = G_K + rho_K_field(k, l, q) = rho_K + G_K_field(k, l, q) = G_K - !TODO: take this out if not needed - if (G_K < verysmall) then - G_K_field(k, l, q) = 0 - end if - end do + !TODO: take this out if not needed + if (G_K < verysmall) then + G_K_field(k, l, q) = 0 + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() ! apply rhs source term to elastic stress equation - #:call GPU_PARALLEL_LOOP(collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - rhs_vf(strxb)%sf(k, l, q) = & - rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & - ((4._wp*G_K_field(k, l, q)/3._wp) + & - q_prim_vf(strxb)%sf(k, l, q))* & - du_dx_hypo(k, l, q) - end do + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) + do q = 0, p + do l = 0, n + do k = 0, m + rhs_vf(strxb)%sf(k, l, q) = & + rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & + ((4._wp*G_K_field(k, l, q)/3._wp) + & + q_prim_vf(strxb)%sf(k, l, q))* & + du_dx_hypo(k, l, q) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() elseif (idir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - rhs_vf(strxb)%sf(k, l, q) = rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 1)%sf(k, l, q)*du_dy_hypo(k, l, q) + & - q_prim_vf(strxb + 1)%sf(k, l, q)*du_dy_hypo(k, l, q) - & - q_prim_vf(strxb)%sf(k, l, q)*dv_dy_hypo(k, l, q) - & - 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dv_dy_hypo(k, l, q)) - - rhs_vf(strxb + 1)%sf(k, l, q) = rhs_vf(strxb + 1)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 1)%sf(k, l, q)*du_dx_hypo(k, l, q) + & - q_prim_vf(strxb)%sf(k, l, q)*dv_dx_hypo(k, l, q) - & - q_prim_vf(strxb + 1)%sf(k, l, q)*du_dx_hypo(k, l, q) + & - q_prim_vf(strxb + 2)%sf(k, l, q)*du_dy_hypo(k, l, q) + & - q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dy_hypo(k, l, q) - & - q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & - 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(du_dy_hypo(k, l, q) + & - dv_dx_hypo(k, l, q))) - - rhs_vf(strxb + 2)%sf(k, l, q) = rhs_vf(strxb + 2)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dx_hypo(k, l, q) + & - q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dx_hypo(k, l, q) - & - q_prim_vf(strxb + 2)%sf(k, l, q)*du_dx_hypo(k, l, q) + & - q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & - q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy_hypo(k, l, q) - & - q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & - 2._wp*G_K_field(k, l, q)*(dv_dy_hypo(k, l, q) - (1._wp/3._wp)* & - (du_dx_hypo(k, l, q) + & - dv_dy_hypo(k, l, q)))) - end do + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) + do q = 0, p + do l = 0, n + do k = 0, m + rhs_vf(strxb)%sf(k, l, q) = rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 1)%sf(k, l, q)*du_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 1)%sf(k, l, q)*du_dy_hypo(k, l, q) - & + q_prim_vf(strxb)%sf(k, l, q)*dv_dy_hypo(k, l, q) - & + 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dv_dy_hypo(k, l, q)) + + rhs_vf(strxb + 1)%sf(k, l, q) = rhs_vf(strxb + 1)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 1)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxb)%sf(k, l, q)*dv_dx_hypo(k, l, q) - & + q_prim_vf(strxb + 1)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxb + 2)%sf(k, l, q)*du_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dy_hypo(k, l, q) - & + q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(du_dy_hypo(k, l, q) + & + dv_dx_hypo(k, l, q))) + + rhs_vf(strxb + 2)%sf(k, l, q) = rhs_vf(strxb + 2)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dx_hypo(k, l, q) + & + q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dx_hypo(k, l, q) - & + q_prim_vf(strxb + 2)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy_hypo(k, l, q) - & + q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + 2._wp*G_K_field(k, l, q)*(dv_dy_hypo(k, l, q) - (1._wp/3._wp)* & + (du_dx_hypo(k, l, q) + & + dv_dy_hypo(k, l, q)))) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() elseif (idir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - rhs_vf(strxb)%sf(k, l, q) = rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 3)%sf(k, l, q)*du_dz_hypo(k, l, q) + & - q_prim_vf(strxb + 3)%sf(k, l, q)*du_dz_hypo(k, l, q) - & - q_prim_vf(strxb)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) + do q = 0, p + do l = 0, n + do k = 0, m + rhs_vf(strxb)%sf(k, l, q) = rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 3)%sf(k, l, q)*du_dz_hypo(k, l, q) + & + q_prim_vf(strxb + 3)%sf(k, l, q)*du_dz_hypo(k, l, q) - & + q_prim_vf(strxb)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & + 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dw_dz_hypo(k, l, q)) + + rhs_vf(strxb + 1)%sf(k, l, q) = rhs_vf(strxb + 1)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 4)%sf(k, l, q)*du_dz_hypo(k, l, q) + & + q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dz_hypo(k, l, q) - & + q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dz_hypo(k, l, q)) + + rhs_vf(strxb + 2)%sf(k, l, q) = rhs_vf(strxb + 2)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dz_hypo(k, l, q) + & + q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dz_hypo(k, l, q) - & + q_prim_vf(strxb + 2)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dw_dz_hypo(k, l, q)) - rhs_vf(strxb + 1)%sf(k, l, q) = rhs_vf(strxb + 1)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 4)%sf(k, l, q)*du_dz_hypo(k, l, q) + & - q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dz_hypo(k, l, q) - & - q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dz_hypo(k, l, q)) - - rhs_vf(strxb + 2)%sf(k, l, q) = rhs_vf(strxb + 2)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dz_hypo(k, l, q) + & - q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dz_hypo(k, l, q) - & - q_prim_vf(strxb + 2)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & - 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dw_dz_hypo(k, l, q)) - - rhs_vf(strxb + 3)%sf(k, l, q) = rhs_vf(strxb + 3)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 3)%sf(k, l, q)*du_dx_hypo(k, l, q) + & - q_prim_vf(strxb)%sf(k, l, q)*dw_dx_hypo(k, l, q) - & - q_prim_vf(strxb + 3)%sf(k, l, q)*du_dx_hypo(k, l, q) + & - q_prim_vf(strxb + 4)%sf(k, l, q)*du_dy_hypo(k, l, q) + & - q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dy_hypo(k, l, q) - & - q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & - q_prim_vf(strxb + 5)%sf(k, l, q)*du_dz_hypo(k, l, q) + & - q_prim_vf(strxb + 3)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & - q_prim_vf(strxb + 3)%sf(k, l, q)*dw_dz_hypo(k, l, q) + & - 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(du_dz_hypo(k, l, q) + & - dw_dx_hypo(k, l, q))) - - rhs_vf(strxb + 4)%sf(k, l, q) = rhs_vf(strxb + 4)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dx_hypo(k, l, q) + & - q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dx_hypo(k, l, q) - & - q_prim_vf(strxb + 4)%sf(k, l, q)*du_dx_hypo(k, l, q) + & - q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & - q_prim_vf(strxb + 2)%sf(k, l, q)*dw_dy_hypo(k, l, q) - & - q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & - q_prim_vf(strxb + 5)%sf(k, l, q)*dv_dz_hypo(k, l, q) + & - q_prim_vf(strxb + 4)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & - q_prim_vf(strxb + 4)%sf(k, l, q)*dw_dz_hypo(k, l, q) + & - 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(dv_dz_hypo(k, l, q) + & - dw_dy_hypo(k, l, q))) - - rhs_vf(strxe)%sf(k, l, q) = rhs_vf(strxe)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxe - 2)%sf(k, l, q)*dw_dx_hypo(k, l, q) + & - q_prim_vf(strxe - 2)%sf(k, l, q)*dw_dx_hypo(k, l, q) - & - q_prim_vf(strxe)%sf(k, l, q)*du_dx_hypo(k, l, q) + & - q_prim_vf(strxe - 1)%sf(k, l, q)*dw_dy_hypo(k, l, q) + & - q_prim_vf(strxe - 1)%sf(k, l, q)*dw_dy_hypo(k, l, q) - & - q_prim_vf(strxe)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & - q_prim_vf(strxe)%sf(k, l, q)*dw_dz_hypo(k, l, q) + & - q_prim_vf(strxe)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & - q_prim_vf(strxe)%sf(k, l, q)*dw_dz_hypo(k, l, q) + & - 2._wp*G_K_field(k, l, q)*(dw_dz_hypo(k, l, q) - (1._wp/3._wp)* & - (du_dx_hypo(k, l, q) + & - dv_dy_hypo(k, l, q) + & - dw_dz_hypo(k, l, q)))) - end do + rhs_vf(strxb + 3)%sf(k, l, q) = rhs_vf(strxb + 3)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 3)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxb)%sf(k, l, q)*dw_dx_hypo(k, l, q) - & + q_prim_vf(strxb + 3)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxb + 4)%sf(k, l, q)*du_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dy_hypo(k, l, q) - & + q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 5)%sf(k, l, q)*du_dz_hypo(k, l, q) + & + q_prim_vf(strxb + 3)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & + q_prim_vf(strxb + 3)%sf(k, l, q)*dw_dz_hypo(k, l, q) + & + 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(du_dz_hypo(k, l, q) + & + dw_dx_hypo(k, l, q))) + + rhs_vf(strxb + 4)%sf(k, l, q) = rhs_vf(strxb + 4)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dx_hypo(k, l, q) + & + q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dx_hypo(k, l, q) - & + q_prim_vf(strxb + 4)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 2)%sf(k, l, q)*dw_dy_hypo(k, l, q) - & + q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 5)%sf(k, l, q)*dv_dz_hypo(k, l, q) + & + q_prim_vf(strxb + 4)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & + q_prim_vf(strxb + 4)%sf(k, l, q)*dw_dz_hypo(k, l, q) + & + 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(dv_dz_hypo(k, l, q) + & + dw_dy_hypo(k, l, q))) + + rhs_vf(strxe)%sf(k, l, q) = rhs_vf(strxe)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxe - 2)%sf(k, l, q)*dw_dx_hypo(k, l, q) + & + q_prim_vf(strxe - 2)%sf(k, l, q)*dw_dx_hypo(k, l, q) - & + q_prim_vf(strxe)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxe - 1)%sf(k, l, q)*dw_dy_hypo(k, l, q) + & + q_prim_vf(strxe - 1)%sf(k, l, q)*dw_dy_hypo(k, l, q) - & + q_prim_vf(strxe)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + q_prim_vf(strxe)%sf(k, l, q)*dw_dz_hypo(k, l, q) + & + q_prim_vf(strxe)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & + q_prim_vf(strxe)%sf(k, l, q)*dw_dz_hypo(k, l, q) + & + 2._wp*G_K_field(k, l, q)*(dw_dz_hypo(k, l, q) - (1._wp/3._wp)* & + (du_dx_hypo(k, l, q) + & + dv_dy_hypo(k, l, q) + & + dw_dz_hypo(k, l, q)))) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (cyl_coord .and. idir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - ! S_xx -= rho * v/r * (tau_xx + 2/3*G) - rhs_vf(strxb)%sf(k, l, q) = rhs_vf(strxb)%sf(k, l, q) - & + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) + do q = 0, p + do l = 0, n + do k = 0, m + ! S_xx -= rho * v/r * (tau_xx + 2/3*G) + rhs_vf(strxb)%sf(k, l, q) = rhs_vf(strxb)%sf(k, l, q) - & + rho_K_field(k, l, q)*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)* & + (q_prim_vf(strxb)%sf(k, l, q) + (2._wp/3._wp)*G_K_field(k, l, q)) ! tau_xx + 2/3*G + + ! S_xr -= rho * v/r * tau_xr + rhs_vf(strxb + 1)%sf(k, l, q) = rhs_vf(strxb + 1)%sf(k, l, q) - & rho_K_field(k, l, q)*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)* & - (q_prim_vf(strxb)%sf(k, l, q) + (2._wp/3._wp)*G_K_field(k, l, q)) ! tau_xx + 2/3*G - - ! S_xr -= rho * v/r * tau_xr - rhs_vf(strxb + 1)%sf(k, l, q) = rhs_vf(strxb + 1)%sf(k, l, q) - & - rho_K_field(k, l, q)*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)* & - q_prim_vf(strxb + 1)%sf(k, l, q) ! tau_xx - - ! S_rr -= rho * v/r * (tau_rr + 2/3*G) - rhs_vf(strxb + 2)%sf(k, l, q) = rhs_vf(strxb + 2)%sf(k, l, q) - & - rho_K_field(k, l, q)*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)* & - (q_prim_vf(strxb + 2)%sf(k, l, q) + (2._wp/3._wp)*G_K_field(k, l, q)) ! tau_rr + 2/3*G - - ! S_thetatheta += rho * ( -(tau_thetatheta + 2/3*G)*(du/dx + dv/dr + v/r) + 2*(tau_thetatheta + G)*v/r ) - rhs_vf(strxb + 3)%sf(k, l, q) = rhs_vf(strxb + 3)%sf(k, l, q) + & - rho_K_field(k, l, q)*( & - -(q_prim_vf(strxb + 3)%sf(k, l, q) + (2._wp/3._wp)*G_K_field(k, l, q))* & - (du_dx_hypo(k, l, q) + dv_dy_hypo(k, l, q) + q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)) & - + 2._wp*(q_prim_vf(strxb + 3)%sf(k, l, q) + G_K_field(k, l, q))*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)) - end do + q_prim_vf(strxb + 1)%sf(k, l, q) ! tau_xx + + ! S_rr -= rho * v/r * (tau_rr + 2/3*G) + rhs_vf(strxb + 2)%sf(k, l, q) = rhs_vf(strxb + 2)%sf(k, l, q) - & + rho_K_field(k, l, q)*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)* & + (q_prim_vf(strxb + 2)%sf(k, l, q) + (2._wp/3._wp)*G_K_field(k, l, q)) ! tau_rr + 2/3*G + + ! S_thetatheta += rho * ( -(tau_thetatheta + 2/3*G)*(du/dx + dv/dr + v/r) + 2*(tau_thetatheta + G)*v/r ) + rhs_vf(strxb + 3)%sf(k, l, q) = rhs_vf(strxb + 3)%sf(k, l, q) + & + rho_K_field(k, l, q)*( & + -(q_prim_vf(strxb + 3)%sf(k, l, q) + (2._wp/3._wp)*G_K_field(k, l, q))* & + (du_dx_hypo(k, l, q) + dv_dy_hypo(k, l, q) + q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)) & + + 2._wp*(q_prim_vf(strxb + 3)%sf(k, l, q) + G_K_field(k, l, q))*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if @@ -404,66 +404,66 @@ contains if (n == 0) then l = 0; q = 0 - #:call GPU_PARALLEL_LOOP() - do k = 0, m - rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(abs(q_cons_vf(stress_idx%beg)%sf(k, l, q)) - tau_star, 0._wp))**cont_damage_s - end do - #:endcall GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(private='[k]', copyin='[l,q]') + do k = 0, m + rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(abs(q_cons_vf(stress_idx%beg)%sf(k, l, q)) - tau_star, 0._wp))**cont_damage_s + end do + $:END_GPU_PARALLEL_LOOP() elseif (p == 0) then q = 0 - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[k,l]', copyin='[q]', collapse=2) + do l = 0, n + do k = 0, m + ! Maximum principal stress + tau_p = 0.5_wp*(q_cons_vf(stress_idx%beg)%sf(k, l, q) + & + q_cons_vf(stress_idx%beg + 2)%sf(k, l, q)) + & + sqrt((q_cons_vf(stress_idx%beg)%sf(k, l, q) - & + q_cons_vf(stress_idx%beg + 2)%sf(k, l, q))**2.0_wp + & + 4._wp*q_cons_vf(stress_idx%beg + 1)%sf(k, l, q)**2.0_wp)/2._wp + + rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(tau_p - tau_star, 0._wp))**cont_damage_s + end do + end do + $:END_GPU_PARALLEL_LOOP() + else + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) + do q = 0, p do l = 0, n do k = 0, m + tau_xx = q_cons_vf(stress_idx%beg)%sf(k, l, q) + tau_xy = q_cons_vf(stress_idx%beg + 1)%sf(k, l, q) + tau_yy = q_cons_vf(stress_idx%beg + 2)%sf(k, l, q) + tau_xz = q_cons_vf(stress_idx%beg + 3)%sf(k, l, q) + tau_yz = q_cons_vf(stress_idx%beg + 4)%sf(k, l, q) + tau_zz = q_cons_vf(stress_idx%beg + 5)%sf(k, l, q) + + ! Invariants of the stress tensor + I1 = tau_xx + tau_yy + tau_zz + I2 = tau_xx*tau_yy + tau_xx*tau_zz + tau_yy*tau_zz - & + (tau_xy**2.0_wp + tau_xz**2.0_wp + tau_yz**2.0_wp) + I3 = tau_xx*tau_yy*tau_zz + 2.0_wp*tau_xy*tau_xz*tau_yz - & + tau_xx*tau_yz**2.0_wp - tau_yy*tau_xz**2.0_wp - tau_zz*tau_xy**2.0_wp + ! Maximum principal stress - tau_p = 0.5_wp*(q_cons_vf(stress_idx%beg)%sf(k, l, q) + & - q_cons_vf(stress_idx%beg + 2)%sf(k, l, q)) + & - sqrt((q_cons_vf(stress_idx%beg)%sf(k, l, q) - & - q_cons_vf(stress_idx%beg + 2)%sf(k, l, q))**2.0_wp + & - 4._wp*q_cons_vf(stress_idx%beg + 1)%sf(k, l, q)**2.0_wp)/2._wp + temp = I1**2.0_wp - 3.0_wp*I2 + sqrt_term_1 = sqrt(max(temp, 0.0_wp)) + if (sqrt_term_1 > verysmall) then ! Avoid 0/0 + argument = (2.0_wp*I1*I1*I1 - 9.0_wp*I1*I2 + 27.0_wp*I3)/ & + (2.0_wp*sqrt_term_1*sqrt_term_1*sqrt_term_1) + if (argument > 1.0_wp) argument = 1.0_wp + if (argument < -1.0_wp) argument = -1.0_wp + phi = acos(argument) + sqrt_term_2 = sqrt(max(I1**2.0_wp - 3.0_wp*I2, 0.0_wp)) + tau_p = I1/3.0_wp + 2.0_wp/sqrt(3.0_wp)*sqrt_term_2*cos(phi/3.0_wp) + else + tau_p = I1/3.0_wp + end if rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(tau_p - tau_star, 0._wp))**cont_damage_s end do end do - #:endcall GPU_PARALLEL_LOOP - else - #:call GPU_PARALLEL_LOOP(collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - tau_xx = q_cons_vf(stress_idx%beg)%sf(k, l, q) - tau_xy = q_cons_vf(stress_idx%beg + 1)%sf(k, l, q) - tau_yy = q_cons_vf(stress_idx%beg + 2)%sf(k, l, q) - tau_xz = q_cons_vf(stress_idx%beg + 3)%sf(k, l, q) - tau_yz = q_cons_vf(stress_idx%beg + 4)%sf(k, l, q) - tau_zz = q_cons_vf(stress_idx%beg + 5)%sf(k, l, q) - - ! Invariants of the stress tensor - I1 = tau_xx + tau_yy + tau_zz - I2 = tau_xx*tau_yy + tau_xx*tau_zz + tau_yy*tau_zz - & - (tau_xy**2.0_wp + tau_xz**2.0_wp + tau_yz**2.0_wp) - I3 = tau_xx*tau_yy*tau_zz + 2.0_wp*tau_xy*tau_xz*tau_yz - & - tau_xx*tau_yz**2.0_wp - tau_yy*tau_xz**2.0_wp - tau_zz*tau_xy**2.0_wp - - ! Maximum principal stress - temp = I1**2.0_wp - 3.0_wp*I2 - sqrt_term_1 = sqrt(max(temp, 0.0_wp)) - if (sqrt_term_1 > verysmall) then ! Avoid 0/0 - argument = (2.0_wp*I1*I1*I1 - 9.0_wp*I1*I2 + 27.0_wp*I3)/ & - (2.0_wp*sqrt_term_1*sqrt_term_1*sqrt_term_1) - if (argument > 1.0_wp) argument = 1.0_wp - if (argument < -1.0_wp) argument = -1.0_wp - phi = acos(argument) - sqrt_term_2 = sqrt(max(I1**2.0_wp - 3.0_wp*I2, 0.0_wp)) - tau_p = I1/3.0_wp + 2.0_wp/sqrt(3.0_wp)*sqrt_term_2*cos(phi/3.0_wp) - else - tau_p = I1/3.0_wp - end if - - rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(tau_p - tau_star, 0._wp))**cont_damage_s - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_compute_damage_state diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 1a159c74d6..3fc11a74b4 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -197,193 +197,193 @@ contains type(ghost_point) :: gp type(ghost_point) :: innerp if (num_gps > 0) then - #:call GPU_PARALLEL_LOOP(private='[physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,pres_IP,vel_IP,vel_g,vel_norm_IP,r_IP, v_IP,pb_IP,mv_IP,nmom_IP,presb_IP,massv_IP,rho, gamma,pi_inf,Re_K,G_K,Gs,gp,innerp,norm,buf, radial_vector, rotation_velocity, j,k,l,q]') - do i = 1, num_gps - - gp = ghost_points(i) - j = gp%loc(1) - k = gp%loc(2) - l = gp%loc(3) - patch_id = ghost_points(i)%ib_patch_id - - ! Calculate physical location of GP - if (p > 0) then - physical_loc = [x_cc(j), y_cc(k), z_cc(l)] + $:GPU_PARALLEL_LOOP(private='[i,physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,pres_IP,vel_IP,vel_g,vel_norm_IP,r_IP, v_IP,pb_IP,mv_IP,nmom_IP,presb_IP,massv_IP,rho, gamma,pi_inf,Re_K,G_K,Gs,gp,innerp,norm,buf, radial_vector, rotation_velocity, j,k,l,q]') + do i = 1, num_gps + + gp = ghost_points(i) + j = gp%loc(1) + k = gp%loc(2) + l = gp%loc(3) + patch_id = ghost_points(i)%ib_patch_id + + ! Calculate physical location of GP + if (p > 0) then + physical_loc = [x_cc(j), y_cc(k), z_cc(l)] + else + physical_loc = [x_cc(j), y_cc(k), 0._wp] + end if + + !Interpolate primitive variables at image point associated w/ GP + if (bubbles_euler .and. .not. qbmm) then + call s_interpolate_image_point(q_prim_vf, gp, & + alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & + r_IP, v_IP, pb_IP, mv_IP) + else if (qbmm .and. polytropic) then + call s_interpolate_image_point(q_prim_vf, gp, & + alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & + r_IP, v_IP, pb_IP, mv_IP, nmom_IP) + else if (qbmm .and. .not. polytropic) then + call s_interpolate_image_point(q_prim_vf, gp, & + alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & + r_IP, v_IP, pb_IP, mv_IP, nmom_IP, pb_in, mv_in, presb_IP, massv_IP) + else + call s_interpolate_image_point(q_prim_vf, gp, & + alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP) + end if + dyn_pres = 0._wp + + ! Set q_prim_vf params at GP so that mixture vars calculated properly + $:GPU_LOOP(parallelism='[seq]') + do q = 1, num_fluids + q_prim_vf(q)%sf(j, k, l) = alpha_rho_IP(q) + q_prim_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) + end do + + if (surface_tension) then + q_prim_vf(c_idx)%sf(j, k, l) = c_IP + end if + if (model_eqns /= 4) then + ! If in simulation, use acc mixture subroutines + if (elasticity) then + call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & + alpha_rho_IP, Re_K, G_K, Gs) + else if (bubbles_euler) then + call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & + alpha_rho_IP, Re_K) else - physical_loc = [x_cc(j), y_cc(k), 0._wp] + call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & + alpha_rho_IP, Re_K) end if + end if - !Interpolate primitive variables at image point associated w/ GP - if (bubbles_euler .and. .not. qbmm) then - call s_interpolate_image_point(q_prim_vf, gp, & - alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & - r_IP, v_IP, pb_IP, mv_IP) - else if (qbmm .and. polytropic) then - call s_interpolate_image_point(q_prim_vf, gp, & - alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & - r_IP, v_IP, pb_IP, mv_IP, nmom_IP) - else if (qbmm .and. .not. polytropic) then - call s_interpolate_image_point(q_prim_vf, gp, & - alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & - r_IP, v_IP, pb_IP, mv_IP, nmom_IP, pb_in, mv_in, presb_IP, massv_IP) + ! Calculate velocity of ghost cell + if (gp%slip) then + norm(1:3) = levelset_norm%sf(gp%loc(1), gp%loc(2), gp%loc(3), gp%ib_patch_id, 1:3) + buf = sqrt(sum(norm**2)) + norm = norm/buf + vel_norm_IP = sum(vel_IP*norm)*norm + vel_g = vel_IP - vel_norm_IP + if (patch_ib(patch_id)%moving_ibm /= 0) then + ! compute the linear velocity of the ghost point due to rotation + radial_vector = physical_loc - [patch_ib(patch_id)%x_centroid, & + patch_ib(patch_id)%y_centroid, patch_ib(patch_id)%z_centroid] + rotation_velocity = cross_product(matmul(patch_ib(patch_id)%rotation_matrix, patch_ib(patch_id)%angular_vel), radial_vector) + + ! add only the component of the IB's motion that is normal to the surface + vel_g = vel_g + sum((patch_ib(patch_id)%vel + rotation_velocity)*norm)*norm + end if + else + if (patch_ib(patch_id)%moving_ibm == 0) then + ! we know the object is not moving if moving_ibm is 0 (false) + vel_g = 0._wp else - call s_interpolate_image_point(q_prim_vf, gp, & - alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP) + ! get the vector that points from the centroid to the ghost + radial_vector = physical_loc - [patch_ib(patch_id)%x_centroid, & + patch_ib(patch_id)%y_centroid, patch_ib(patch_id)%z_centroid] + ! convert the angular velocity from the inertial reference frame to the fluids frame, then convert to linear velocity + rotation_velocity = cross_product(matmul(patch_ib(patch_id)%rotation_matrix, patch_ib(patch_id)%angular_vel), radial_vector) + do q = 1, 3 + ! if mibm is 1 or 2, then the boundary may be moving + vel_g(q) = patch_ib(patch_id)%vel(q) ! add the linear velocity + vel_g(q) = vel_g(q) + rotation_velocity(q) ! add the rotational velocity + end do end if - dyn_pres = 0._wp + end if - ! Set q_prim_vf params at GP so that mixture vars calculated properly - $:GPU_LOOP(parallelism='[seq]') - do q = 1, num_fluids - q_prim_vf(q)%sf(j, k, l) = alpha_rho_IP(q) - q_prim_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) - end do + ! Set momentum + $:GPU_LOOP(parallelism='[seq]') + do q = momxb, momxe + q_cons_vf(q)%sf(j, k, l) = rho*vel_g(q - momxb + 1) + dyn_pres = dyn_pres + q_cons_vf(q)%sf(j, k, l)* & + vel_g(q - momxb + 1)/2._wp + end do - if (surface_tension) then - q_prim_vf(c_idx)%sf(j, k, l) = c_IP - end if - if (model_eqns /= 4) then - ! If in simulation, use acc mixture subroutines - if (elasticity) then - call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & - alpha_rho_IP, Re_K, G_K, Gs) - else if (bubbles_euler) then - call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & - alpha_rho_IP, Re_K) - else - call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & - alpha_rho_IP, Re_K) - end if - end if + ! Set continuity and adv vars + $:GPU_LOOP(parallelism='[seq]') + do q = 1, num_fluids + q_cons_vf(q)%sf(j, k, l) = alpha_rho_IP(q) + q_cons_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) + end do - ! Calculate velocity of ghost cell - if (gp%slip) then - norm(1:3) = levelset_norm%sf(gp%loc(1), gp%loc(2), gp%loc(3), gp%ib_patch_id, 1:3) - buf = sqrt(sum(norm**2)) - norm = norm/buf - vel_norm_IP = sum(vel_IP*norm)*norm - vel_g = vel_IP - vel_norm_IP - if (patch_ib(patch_id)%moving_ibm /= 0) then - ! compute the linear velocity of the ghost point due to rotation - radial_vector = physical_loc - [patch_ib(patch_id)%x_centroid, & - patch_ib(patch_id)%y_centroid, patch_ib(patch_id)%z_centroid] - rotation_velocity = cross_product(matmul(patch_ib(patch_id)%rotation_matrix, patch_ib(patch_id)%angular_vel), radial_vector) - - ! add only the component of the IB's motion that is normal to the surface - vel_g = vel_g + sum((patch_ib(patch_id)%vel + rotation_velocity)*norm)*norm - end if - else - if (patch_ib(patch_id)%moving_ibm == 0) then - ! we know the object is not moving if moving_ibm is 0 (false) - vel_g = 0._wp - else - ! get the vector that points from the centroid to the ghost - radial_vector = physical_loc - [patch_ib(patch_id)%x_centroid, & - patch_ib(patch_id)%y_centroid, patch_ib(patch_id)%z_centroid] - ! convert the angular velocity from the inertial reference frame to the fluids frame, then convert to linear velocity - rotation_velocity = cross_product(matmul(patch_ib(patch_id)%rotation_matrix, patch_ib(patch_id)%angular_vel), radial_vector) - do q = 1, 3 - ! if mibm is 1 or 2, then the boundary may be moving - vel_g(q) = patch_ib(patch_id)%vel(q) ! add the linear velocity - vel_g(q) = vel_g(q) + rotation_velocity(q) ! add the rotational velocity - end do - end if - end if + ! Set color function + if (surface_tension) then + q_cons_vf(c_idx)%sf(j, k, l) = c_IP + end if - ! Set momentum + ! Set Energy + if (bubbles_euler) then + q_cons_vf(E_idx)%sf(j, k, l) = (1 - alpha_IP(1))*(gamma*pres_IP + pi_inf + dyn_pres) + else + q_cons_vf(E_idx)%sf(j, k, l) = gamma*pres_IP + pi_inf + dyn_pres + end if + ! Set bubble vars + if (bubbles_euler .and. .not. qbmm) then + call s_comp_n_from_prim(alpha_IP(1), r_IP, nbub, weight) $:GPU_LOOP(parallelism='[seq]') - do q = momxb, momxe - q_cons_vf(q)%sf(j, k, l) = rho*vel_g(q - momxb + 1) - dyn_pres = dyn_pres + q_cons_vf(q)%sf(j, k, l)* & - vel_g(q - momxb + 1)/2._wp + do q = 1, nb + q_cons_vf(bubxb + (q - 1)*2)%sf(j, k, l) = nbub*r_IP(q) + q_cons_vf(bubxb + (q - 1)*2 + 1)%sf(j, k, l) = nbub*v_IP(q) + if (.not. polytropic) then + q_cons_vf(bubxb + (q - 1)*4)%sf(j, k, l) = nbub*r_IP(q) + q_cons_vf(bubxb + (q - 1)*4 + 1)%sf(j, k, l) = nbub*v_IP(q) + q_cons_vf(bubxb + (q - 1)*4 + 2)%sf(j, k, l) = nbub*pb_IP(q) + q_cons_vf(bubxb + (q - 1)*4 + 3)%sf(j, k, l) = nbub*mv_IP(q) + end if end do + end if + + if (qbmm) then - ! Set continuity and adv vars + nbub = nmom_IP(1) $:GPU_LOOP(parallelism='[seq]') - do q = 1, num_fluids - q_cons_vf(q)%sf(j, k, l) = alpha_rho_IP(q) - q_cons_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) + do q = 1, nb*nmom + q_cons_vf(bubxb + q - 1)%sf(j, k, l) = nbub*nmom_IP(q) end do - ! Set color function - if (surface_tension) then - q_cons_vf(c_idx)%sf(j, k, l) = c_IP - end if - - ! Set Energy - if (bubbles_euler) then - q_cons_vf(E_idx)%sf(j, k, l) = (1 - alpha_IP(1))*(gamma*pres_IP + pi_inf + dyn_pres) - else - q_cons_vf(E_idx)%sf(j, k, l) = gamma*pres_IP + pi_inf + dyn_pres - end if - ! Set bubble vars - if (bubbles_euler .and. .not. qbmm) then - call s_comp_n_from_prim(alpha_IP(1), r_IP, nbub, weight) - $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - q_cons_vf(bubxb + (q - 1)*2)%sf(j, k, l) = nbub*r_IP(q) - q_cons_vf(bubxb + (q - 1)*2 + 1)%sf(j, k, l) = nbub*v_IP(q) - if (.not. polytropic) then - q_cons_vf(bubxb + (q - 1)*4)%sf(j, k, l) = nbub*r_IP(q) - q_cons_vf(bubxb + (q - 1)*4 + 1)%sf(j, k, l) = nbub*v_IP(q) - q_cons_vf(bubxb + (q - 1)*4 + 2)%sf(j, k, l) = nbub*pb_IP(q) - q_cons_vf(bubxb + (q - 1)*4 + 3)%sf(j, k, l) = nbub*mv_IP(q) - end if - end do - end if - - if (qbmm) then - - nbub = nmom_IP(1) - $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb*nmom - q_cons_vf(bubxb + q - 1)%sf(j, k, l) = nbub*nmom_IP(q) - end do + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb + q_cons_vf(bubxb + (q - 1)*nmom)%sf(j, k, l) = nbub + end do + if (.not. polytropic) then $:GPU_LOOP(parallelism='[seq]') do q = 1, nb - q_cons_vf(bubxb + (q - 1)*nmom)%sf(j, k, l) = nbub - end do - - if (.not. polytropic) then $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - $:GPU_LOOP(parallelism='[seq]') - do r = 1, nnode - pb_in(j, k, l, r, q) = presb_IP((q - 1)*nnode + r) - mv_in(j, k, l, r, q) = massv_IP((q - 1)*nnode + r) - end do + do r = 1, nnode + pb_in(j, k, l, r, q) = presb_IP((q - 1)*nnode + r) + mv_in(j, k, l, r, q) = massv_IP((q - 1)*nnode + r) end do - end if - end if - - if (model_eqns == 3) then - $:GPU_LOOP(parallelism='[seq]') - do q = intxb, intxe - q_cons_vf(q)%sf(j, k, l) = alpha_IP(q - intxb + 1)*(gammas(q - intxb + 1)*pres_IP & - + pi_infs(q - intxb + 1)) end do end if - end do - #:endcall GPU_PARALLEL_LOOP + end if + + if (model_eqns == 3) then + $:GPU_LOOP(parallelism='[seq]') + do q = intxb, intxe + q_cons_vf(q)%sf(j, k, l) = alpha_IP(q - intxb + 1)*(gammas(q - intxb + 1)*pres_IP & + + pi_infs(q - intxb + 1)) + end do + end if + end do + $:END_GPU_PARALLEL_LOOP() end if !Correct the state of the inner points in IBs if (num_inner_gps > 0) then - #:call GPU_PARALLEL_LOOP(private='[physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,vel_g,rho,gamma,pi_inf,Re_K,innerp,j,k,l,q]') - do i = 1, num_inner_gps + $:GPU_PARALLEL_LOOP(private='[i,physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,vel_g,rho,gamma,pi_inf,Re_K,innerp,j,k,l,q]') + do i = 1, num_inner_gps - innerp = inner_points(i) - j = innerp%loc(1) - k = innerp%loc(2) - l = innerp%loc(3) + innerp = inner_points(i) + j = innerp%loc(1) + k = innerp%loc(2) + l = innerp%loc(3) - $:GPU_LOOP(parallelism='[seq]') - do q = momxb, momxe - q_cons_vf(q)%sf(j, k, l) = 0._wp - end do + $:GPU_LOOP(parallelism='[seq]') + do q = momxb, momxe + q_cons_vf(q)%sf(j, k, l) = 0._wp end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_ibm_correct_state diff --git a/src/simulation/m_igr.fpp b/src/simulation/m_igr.fpp index 599fe2cc7b..9126d1cf73 100644 --- a/src/simulation/m_igr.fpp +++ b/src/simulation/m_igr.fpp @@ -162,16 +162,16 @@ contains end if #endif - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - jac(j, k, l) = 0._wp - if (igr_iter_solver == 1) jac_old(j, k, l) = 0._wp - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + jac(j, k, l) = 0._wp + if (igr_iter_solver == 1) jac_old(j, k, l) = 0._wp end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (p == 0) then alf_igr = alf_factor*max(dx(1), dy(1))**2._wp @@ -244,83 +244,83 @@ contains end if do q = 1, num_iters - #:call GPU_PARALLEL_LOOP(collapse=3, private='[rho_lx, rho_rx, rho_ly, rho_ry, rho_lz, rho_rz, fd_coeff]') - do l = 0, p - do k = 0, n - do j = 0, m - rho_lx = 0._wp - rho_rx = 0._wp - rho_ly = 0._wp - rho_ry = 0._wp - rho_lz = 0._wp - rho_rz = 0._wp - fd_coeff = 0._wp + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_lx, rho_rx, rho_ly, rho_ry, rho_lz, rho_rz, fd_coeff]') + do l = 0, p + do k = 0, n + do j = 0, m + rho_lx = 0._wp + rho_rx = 0._wp + rho_ly = 0._wp + rho_ry = 0._wp + rho_lz = 0._wp + rho_rz = 0._wp + fd_coeff = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_lx = rho_lx + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j - 1, k, l))/2._wp - rho_rx = rho_rx + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j + 1, k, l))/2._wp - rho_ly = rho_ly + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k - 1, l))/2._wp - rho_ry = rho_ry + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k + 1, l))/2._wp - if (p > 0) then - rho_lz = rho_lz + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k, l - 1))/2._wp - rho_rz = rho_rz + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k, l + 1))/2._wp - end if - fd_coeff = fd_coeff + q_cons_vf(i)%sf(j, k, l) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_lx = rho_lx + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j - 1, k, l))/2._wp + rho_rx = rho_rx + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j + 1, k, l))/2._wp + rho_ly = rho_ly + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k - 1, l))/2._wp + rho_ry = rho_ry + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k + 1, l))/2._wp + if (p > 0) then + rho_lz = rho_lz + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k, l - 1))/2._wp + rho_rz = rho_rz + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k, l + 1))/2._wp + end if + fd_coeff = fd_coeff + q_cons_vf(i)%sf(j, k, l) + end do - fd_coeff = 1._wp/fd_coeff + alf_igr* & - ((1._wp/dx(j)**2._wp)*(1._wp/rho_lx + 1._wp/rho_rx) + & - (1._wp/dy(k)**2._wp)*(1._wp/rho_ly + 1._wp/rho_ry)) + fd_coeff = 1._wp/fd_coeff + alf_igr* & + ((1._wp/dx(j)**2._wp)*(1._wp/rho_lx + 1._wp/rho_rx) + & + (1._wp/dy(k)**2._wp)*(1._wp/rho_ly + 1._wp/rho_ry)) + if (num_dims == 3) then + fd_coeff = fd_coeff + alf_igr*(1._wp/dz(l)**2._wp)*(1._wp/rho_lz + 1._wp/rho_rz) + end if + + if (igr_iter_solver == 1) then ! Jacobi iteration if (num_dims == 3) then - fd_coeff = fd_coeff + alf_igr*(1._wp/dz(l)**2._wp)*(1._wp/rho_lz + 1._wp/rho_rz) + jac(j, k, l) = (alf_igr/fd_coeff)* & + ((1._wp/dx(j)**2._wp)*(jac_old(j - 1, k, l)/rho_lx + jac_old(j + 1, k, l)/rho_rx) + & + (1._wp/dy(k)**2._wp)*(jac_old(j, k - 1, l)/rho_ly + jac_old(j, k + 1, l)/rho_ry) + & + (1._wp/dz(l)**2._wp)*(jac_old(j, k, l - 1)/rho_lz + jac_old(j, k, l + 1)/rho_rz)) + & + jac_rhs(j, k, l)/fd_coeff + else + jac(j, k, l) = (alf_igr/fd_coeff)* & + ((1._wp/dx(j)**2._wp)*(jac_old(j - 1, k, l)/rho_lx + jac_old(j + 1, k, l)/rho_rx) + & + (1._wp/dy(k)**2._wp)*(jac_old(j, k - 1, l)/rho_ly + jac_old(j, k + 1, l)/rho_ry)) + & + jac_rhs(j, k, l)/fd_coeff end if - - if (igr_iter_solver == 1) then ! Jacobi iteration - if (num_dims == 3) then - jac(j, k, l) = (alf_igr/fd_coeff)* & - ((1._wp/dx(j)**2._wp)*(jac_old(j - 1, k, l)/rho_lx + jac_old(j + 1, k, l)/rho_rx) + & - (1._wp/dy(k)**2._wp)*(jac_old(j, k - 1, l)/rho_ly + jac_old(j, k + 1, l)/rho_ry) + & - (1._wp/dz(l)**2._wp)*(jac_old(j, k, l - 1)/rho_lz + jac_old(j, k, l + 1)/rho_rz)) + & - jac_rhs(j, k, l)/fd_coeff - else - jac(j, k, l) = (alf_igr/fd_coeff)* & - ((1._wp/dx(j)**2._wp)*(jac_old(j - 1, k, l)/rho_lx + jac_old(j + 1, k, l)/rho_rx) + & - (1._wp/dy(k)**2._wp)*(jac_old(j, k - 1, l)/rho_ly + jac_old(j, k + 1, l)/rho_ry)) + & - jac_rhs(j, k, l)/fd_coeff - end if - else ! Gauss Seidel iteration - if (num_dims == 3) then - jac(j, k, l) = (alf_igr/fd_coeff)* & - ((1._wp/dx(j)**2._wp)*(jac(j - 1, k, l)/rho_lx + jac(j + 1, k, l)/rho_rx) + & - (1._wp/dy(k)**2._wp)*(jac(j, k - 1, l)/rho_ly + jac(j, k + 1, l)/rho_ry) + & - (1._wp/dz(l)**2._wp)*(jac(j, k, l - 1)/rho_lz + jac(j, k, l + 1)/rho_rz)) + & - jac_rhs(j, k, l)/fd_coeff - else - jac(j, k, l) = (alf_igr/fd_coeff)* & - ((1._wp/dx(j)**2._wp)*(jac(j - 1, k, l)/rho_lx + jac(j + 1, k, l)/rho_rx) + & - (1._wp/dy(k)**2._wp)*(jac(j, k - 1, l)/rho_ly + jac(j, k + 1, l)/rho_ry)) + & - jac_rhs(j, k, l)/fd_coeff - end if + else ! Gauss Seidel iteration + if (num_dims == 3) then + jac(j, k, l) = (alf_igr/fd_coeff)* & + ((1._wp/dx(j)**2._wp)*(jac(j - 1, k, l)/rho_lx + jac(j + 1, k, l)/rho_rx) + & + (1._wp/dy(k)**2._wp)*(jac(j, k - 1, l)/rho_ly + jac(j, k + 1, l)/rho_ry) + & + (1._wp/dz(l)**2._wp)*(jac(j, k, l - 1)/rho_lz + jac(j, k, l + 1)/rho_rz)) + & + jac_rhs(j, k, l)/fd_coeff + else + jac(j, k, l) = (alf_igr/fd_coeff)* & + ((1._wp/dx(j)**2._wp)*(jac(j - 1, k, l)/rho_lx + jac(j + 1, k, l)/rho_rx) + & + (1._wp/dy(k)**2._wp)*(jac(j, k - 1, l)/rho_ly + jac(j, k + 1, l)/rho_ry)) + & + jac_rhs(j, k, l)/fd_coeff end if - end do + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() call s_populate_F_igr_buffers(bc_type, jac_sf) if (igr_iter_solver == 1) then ! Jacobi iteration - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - jac_old(j, k, l) = jac(j, k, l) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + jac_old(j, k, l) = jac(j, k, l) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end do @@ -340,58 +340,58 @@ contains real(wp) :: F_L, vel_L, rho_L, F_R, vel_R, rho_R real(wp), dimension(num_fluids) :: alpha_rho_L, alpha_rho_R - #:call GPU_PARALLEL_LOOP(collapse=3, private='[F_L, vel_L, alpha_rho_L, F_R, vel_R, alpha_rho_R]') - do l = 0, p - do k = 0, n - do j = -1, m + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,F_L, vel_L, alpha_rho_L, F_R, vel_R, alpha_rho_R]') + do l = 0, p + do k = 0, n + do j = -1, m - F_L = 0._wp; F_R = 0._wp - alpha_rho_L = 0._wp; alpha_rho_R = 0._wp - vel_L = 0._wp; vel_R = 0._wp + F_L = 0._wp; F_R = 0._wp + alpha_rho_L = 0._wp; alpha_rho_R = 0._wp + vel_L = 0._wp; vel_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) - end do - - vel_L = vel_L + coeff_L(q)*q_cons_vf(momxb)%sf(j + q, k, l) - F_L = F_L + coeff_L(q)*jac(j + q, k, l) + do i = 1, num_fluids + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) end do - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) - end do + vel_L = vel_L + coeff_L(q)*q_cons_vf(momxb)%sf(j + q, k, l) + F_L = F_L + coeff_L(q)*jac(j + q, k, l) + end do - vel_R = vel_R + coeff_R(q)*q_cons_vf(momxb)%sf(j + q, k, l) - F_R = F_R + coeff_R(q)*jac(j + q, k, l) + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) end do - vel_L = vel_L/sum(alpha_rho_L) - vel_R = vel_R/sum(alpha_rho_R) - - #:for LR in ['L', 'R'] - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & - 0.5_wp*F_${LR}$*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & - 0.5_wp*vel_${LR}$*F_${LR}$*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - 0.5_wp*F_${LR}$*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - 0.5_wp*vel_${LR}$*F_${LR}$*(1._wp/dx(j)) - #:endfor + vel_R = vel_R + coeff_R(q)*q_cons_vf(momxb)%sf(j + q, k, l) + F_R = F_R + coeff_R(q)*jac(j + q, k, l) end do + + vel_L = vel_L/sum(alpha_rho_L) + vel_R = vel_R/sum(alpha_rho_R) + + #:for LR in ['L', 'R'] + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + 0.5_wp*F_${LR}$*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + 0.5_wp*vel_${LR}$*F_${LR}$*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + 0.5_wp*F_${LR}$*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + 0.5_wp*vel_${LR}$*F_${LR}$*(1._wp/dx(j)) + #:endfor end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_igr_sigma_x @@ -419,1688 +419,1231 @@ contains if (idir == 1) then if (p == 0) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') - do l = 0, p - do k = 0, n - do j = -1, m + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + do l = 0, p + do k = 0, n + do j = -1, m - dvel = 0._wp - vflux_L_arr = 0._wp - vflux_R_arr = 0._wp + dvel = 0._wp + vflux_L_arr = 0._wp + vflux_R_arr = 0._wp - #:if MFC_CASE_OPTIMIZATION - #:if igr_order == 5 - !DIR$ unroll 6 - #:elif igr_order == 3 - !DIR$ unroll 4 - #:endif + #:if MFC_CASE_OPTIMIZATION + #:if igr_order == 5 + !DIR$ unroll 6 + #:elif igr_order == 3 + !DIR$ unroll 4 #:endif + #:endif + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe + dvel_small = 0._wp + !x-direction contributions $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - dvel_small = 0._wp - !x-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i + q, k, l) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(1) = (1/(2._wp*dx(j)))*( & - 1._wp*q_cons_vf(momxb)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - 1._wp*q_cons_vf(momxb)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 1)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - - if (q == 0) dvel(:, 1) = dvel_small - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(1))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(1))/3._wp - end if - - !y-direction contributions + do i = -1, 1 + rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + q, k + i, l) - end do - rho_sf_small(i) = rho_L + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + i + q, k, l) end do - - dvel_small(1) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - - if (q == 0) dvel(:, 2) = dvel_small - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp - end if - - if (q == 0) then - jac_rhs(j, k, l) = alf_igr*(2._wp*(dvel(1, 2)*dvel(2, 1)) & - + dvel(1, 1)**2._wp + dvel(2, 2)**2._wp & - + (dvel(1, 1) + dvel(2, 2))**2._wp) - end if + rho_sf_small(i) = rho_L end do - alpha_rho_L = 0._wp; alpha_rho_R = 0._wp - alpha_L = 0._wp; alpha_R = 0._wp - vel_L = 0._wp; vel_R = 0._wp + dvel_small(1) = (1/(2._wp*dx(j)))*( & + 1._wp*q_cons_vf(momxb)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + 1._wp*q_cons_vf(momxb)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 1)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + + if (q == 0) dvel(:, 1) = dvel_small + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(1))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(1))/3._wp + end if + !y-direction contributions $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) - end do - - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) - end do - else - alpha_L(1) = 1._wp - end if - + do i = -1, 1 + rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + q, k + i, l) end do + rho_sf_small(i) = rho_L end do - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) - end do - - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) - end do - else - alpha_R(1) = 1._wp - end if + dvel_small(1) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 1)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) - end do - end do + if (q == 0) dvel(:, 2) = dvel_small - if (num_fluids > 1) then - alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) - alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp end if - rho_L = sum(alpha_rho_L) - gamma_L = sum(alpha_L*gammas) - pi_inf_L = sum(alpha_L*pi_infs) + if (q == 0) then + jac_rhs(j, k, l) = alf_igr*(2._wp*(dvel(1, 2)*dvel(2, 1)) & + + dvel(1, 1)**2._wp + dvel(2, 2)**2._wp & + + (dvel(1, 1) + dvel(2, 2))**2._wp) + end if + end do - rho_R = sum(alpha_rho_R) - gamma_R = sum(alpha_R*gammas) - pi_inf_R = sum(alpha_R*pi_infs) + alpha_rho_L = 0._wp; alpha_rho_R = 0._wp + alpha_L = 0._wp; alpha_R = 0._wp + vel_L = 0._wp; vel_R = 0._wp - vel_L = vel_L/rho_L - vel_R = vel_R/rho_R + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) + end do - if (viscous) then - mu_L = 0._wp; mu_R = 0._wp + if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - mu_L = alpha_L(i)/Res_igr(1, i) + mu_L - mu_R = alpha_R(i)/Res_igr(1, i) + mu_R + do i = 1, num_fluids - 1 + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) end do - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j)) + else + alpha_L(1) = 1._wp end if - E_L = 0._wp; E_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j + q, k, l) + do i = 1, num_dims + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) end do + end do + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j + q, k, l) - end do - - call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & - E_R, gamma_R, pi_inf_R, rho_R, vel_R, & - pres_L, pres_R, cfl) - do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(1))*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(1))*(1._wp/dx(j)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j))) + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_L(i)* & - vel_L(1))*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_L(1)*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_L(i)* & - vel_L(1))*(1._wp/dx(j)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(1)*(1._wp/dx(j))) + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) end do + else + alpha_R(1) = 1._wp end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & - (0.5_wp*(rho_L*(vel_L(1))**2.0 + & - pres_L)*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j + 1))) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + end do + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & - (0.5_wp*(vel_L(1)*(E_L + & - pres_L))*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dx(j + 1))) + if (num_fluids > 1) then + alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) + alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*(rho_L*(vel_L(1))**2.0 + & - pres_L)*(1._wp/dx(j)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j))) + rho_L = sum(alpha_rho_L) + gamma_L = sum(alpha_L*gammas) + pi_inf_L = sum(alpha_L*pi_infs) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j))) + rho_R = sum(alpha_rho_R) + gamma_R = sum(alpha_R*gammas) + pi_inf_R = sum(alpha_R*pi_infs) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_L(1)*(E_L + & - pres_L))*(1._wp/dx(j)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dx(j))) + vel_L = vel_L/rho_L + vel_R = vel_R/rho_R + if (viscous) then + mu_L = 0._wp; mu_R = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(1))*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(1))*(1._wp/dx(j)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j))) + mu_L = alpha_L(i)/Res_igr(1, i) + mu_L + mu_R = alpha_R(i)/Res_igr(1, i) + mu_R end do - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_R(i)* & - vel_R(1))*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_R(1)*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_R(i)* & - vel_R(1))*(1._wp/dx(j)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(1)*(1._wp/dx(j))) - end do - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & - (0.5_wp*(rho_R*(vel_R(1))**2.0 + & - pres_R)*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j + 1))) + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & - (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j + 1))) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & - (0.5_wp*(vel_R(1)*(E_R + & - pres_R))*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dx(j + 1))) + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*(rho_R*(vel_R(1))**2.0 + & - pres_R)*(1._wp/dx(j)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j))) + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j))) + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_R(1)*(E_R + & - pres_R))*(1._wp/dx(j)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dx(j))) + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j)) + end if + E_L = 0._wp; E_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j + q, k, l) end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - else - #:call GPU_PARALLEL_LOOP(collapse=3, private='[rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') - do l = 0, p - do k = 0, n - do j = -1, m - dvel = 0._wp - vflux_L_arr = 0._wp - vflux_R_arr = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j + q, k, l) + end do - #:if MFC_CASE_OPTIMIZATION - #:if igr_order == 5 - !DIR$ unroll 6 - #:elif igr_order == 3 - !DIR$ unroll 4 - #:endif - #:endif - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - dvel_small = 0._wp - !x-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i + q, k, l) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(1) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 1)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 2)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - - if (q == 0) dvel(:, 1) = dvel_small - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(3)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(1))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(3)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(1))/3._wp - end if - - !y-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + q, k + i, l) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(1) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - if (q == 0) dvel_small(3) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 2)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - if (q == 0) dvel(:, 2) = dvel_small - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp - end if - - !z-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + q, k, l + i) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(1) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb)%sf(j + q, k, l + 1)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j + q, k, l - 1)/rho_sf_small(-1)) - if (q == 0) dvel_small(2) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 1)%sf(j + q, k, l + 1)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j + q, k, l - 1)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 2)%sf(j + q, k, l + 1)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j + q, k, l - 1)/rho_sf_small(-1)) - if (q == 0) dvel(:, 3) = dvel_small - - if (q > vidxb) then - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(3))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(3))/3._wp - end if + call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & + E_R, gamma_R, pi_inf_R, rho_R, vel_R, & + pres_L, pres_R, cfl) - if (q == 0) then - jac_rhs(j, k, l) = alf_igr*(2._wp*(dvel(1, 2)*dvel(2, 1) & - + dvel(1, 3)*dvel(3, 1) & - + dvel(2, 3)*dvel(3, 2)) & - + dvel(1, 1)**2._wp + dvel(2, 2)**2._wp & - + dvel(3, 3)**2._wp & - + (dvel(1, 1) + dvel(2, 2) + dvel(3, 3))**2._wp) - end if - end do + do i = 1, num_fluids + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(1))*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j + 1))) - alpha_rho_L = 0._wp; alpha_rho_R = 0._wp - alpha_L = 0._wp; alpha_R = 0._wp - vel_L = 0._wp; vel_R = 0._wp + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(1))*(1._wp/dx(j)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j))) + end do + if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) - end do + do i = 1, num_fluids - 1 + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_L(i)* & + vel_L(1))*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j + 1))) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) - end do - else - alpha_L(1) = 1._wp - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_L(1)*(1._wp/dx(j + 1))) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_L(i)* & + vel_L(1))*(1._wp/dx(j)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(1)*(1._wp/dx(j))) end do + end if - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + (0.5_wp*(rho_L*(vel_L(1))**2.0 + & + pres_L)*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j + 1))) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) - end do - else - alpha_R(1) = 1._wp - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j + 1))) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) - end do - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + (0.5_wp*(vel_L(1)*(E_L + & + pres_L))*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dx(j + 1))) - if (num_fluids > 1) then - alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) - alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*(rho_L*(vel_L(1))**2.0 + & + pres_L)*(1._wp/dx(j)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j))) - rho_L = sum(alpha_rho_L) - gamma_L = sum(alpha_L*gammas) - pi_inf_L = sum(alpha_L*pi_infs) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j))) - rho_R = sum(alpha_rho_R) - gamma_R = sum(alpha_R*gammas) - pi_inf_R = sum(alpha_R*pi_infs) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_L(1)*(E_L + & + pres_L))*(1._wp/dx(j)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dx(j))) - vel_L = vel_L/rho_L - vel_R = vel_R/rho_R + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(1))*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j + 1))) - if (viscous) then - mu_L = 0._wp - mu_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - mu_L = alpha_L(i)/Res_igr(1, i) + mu_L - mu_R = alpha_R(i)/Res_igr(1, i) + mu_R - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(1))*(1._wp/dx(j)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j))) + end do + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j + 1)) + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_R(i)* & + vel_R(1))*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j)) + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_R(1)*(1._wp/dx(j + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j + 1)) + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_R(i)* & + vel_R(1))*(1._wp/dx(j)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j)) + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(1)*(1._wp/dx(j))) + end do + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + (0.5_wp*(rho_R*(vel_R(1))**2.0 + & + pres_R)*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & + (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + (0.5_wp*(vel_R(1)*(E_R + & + pres_R))*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dx(j + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*(rho_R*(vel_R(1))**2.0 + & + pres_R)*(1._wp/dx(j)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_R(1)*(E_R + & + pres_R))*(1._wp/dx(j)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dx(j))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j + 1)) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + else + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + do l = 0, p + do k = 0, n + do j = -1, m - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j)) - end if + dvel = 0._wp + vflux_L_arr = 0._wp + vflux_R_arr = 0._wp + + #:if MFC_CASE_OPTIMIZATION + #:if igr_order == 5 + !DIR$ unroll 6 + #:elif igr_order == 3 + !DIR$ unroll 4 + #:endif + #:endif + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe + dvel_small = 0._wp + !x-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + i + q, k, l) + end do + rho_sf_small(i) = rho_L + end do - E_L = 0._wp; E_R = 0._wp + dvel_small(1) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 1)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 2)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + + if (q == 0) dvel(:, 1) = dvel_small + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(3)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(1))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(3)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(1))/3._wp + end if + !y-direction contributions $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j + q, k, l) + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + q, k + i, l) + end do + rho_sf_small(i) = rho_L end do + dvel_small(1) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 1)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + if (q == 0) dvel_small(3) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 2)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + if (q == 0) dvel(:, 2) = dvel_small + + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp + end if + + !z-direction contributions $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j + q, k, l) + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + q, k, l + i) + end do + rho_sf_small(i) = rho_L end do - call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & - E_R, gamma_R, pi_inf_R, rho_R, vel_R, & - pres_L, pres_R, cfl) + dvel_small(1) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb)%sf(j + q, k, l + 1)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j + q, k, l - 1)/rho_sf_small(-1)) + if (q == 0) dvel_small(2) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb + 1)%sf(j + q, k, l + 1)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j + q, k, l - 1)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb + 2)%sf(j + q, k, l + 1)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j + q, k, l - 1)/rho_sf_small(-1)) + if (q == 0) dvel(:, 3) = dvel_small + + if (q > vidxb) then + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(3))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(3))/3._wp + end if + if (q == 0) then + jac_rhs(j, k, l) = alf_igr*(2._wp*(dvel(1, 2)*dvel(2, 1) & + + dvel(1, 3)*dvel(3, 1) & + + dvel(2, 3)*dvel(3, 2)) & + + dvel(1, 1)**2._wp + dvel(2, 2)**2._wp & + + dvel(3, 3)**2._wp & + + (dvel(1, 1) + dvel(2, 2) + dvel(3, 3))**2._wp) + end if + end do + + alpha_rho_L = 0._wp; alpha_rho_R = 0._wp + alpha_L = 0._wp; alpha_R = 0._wp + vel_L = 0._wp; vel_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(1))*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(1))*(1._wp/dx(j)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j))) + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_L(i)* & - vel_L(1))*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_L(1)*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_L(i)* & - vel_L(1))*(1._wp/dx(j)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(1)*(1._wp/dx(j))) + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) end do + else + alpha_L(1) = 1._wp end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & - (0.5_wp*(rho_L*(vel_L(1))**2.0 + & - pres_L)*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j + 1))) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + end do + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j + 1))) + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) + & - (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dx(j + 1))) + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) + end do + else + alpha_R(1) = 1._wp + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & - (0.5_wp*(vel_L(1)*(E_L + & - pres_L))*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dx(j + 1))) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + end do + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*(rho_L*(vel_L(1))**2.0 + & - pres_L)*(1._wp/dx(j)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j))) + if (num_fluids > 1) then + alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) + alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j))) + rho_L = sum(alpha_rho_L) + gamma_L = sum(alpha_L*gammas) + pi_inf_L = sum(alpha_L*pi_infs) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dx(j)) - & - 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dx(j))) + rho_R = sum(alpha_rho_R) + gamma_R = sum(alpha_R*gammas) + pi_inf_R = sum(alpha_R*pi_infs) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_L(1)*(E_L + & - pres_L))*(1._wp/dx(j)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dx(j))) + vel_L = vel_L/rho_L + vel_R = vel_R/rho_R + if (viscous) then + mu_L = 0._wp + mu_R = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(1))*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(1))*(1._wp/dx(j)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j))) + mu_L = alpha_L(i)/Res_igr(1, i) + mu_L + mu_R = alpha_R(i)/Res_igr(1, i) + mu_R end do - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_R(i)* & - vel_R(1))*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_R(1)*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_R(i)* & - vel_R(1))*(1._wp/dx(j)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(1)*(1._wp/dx(j))) - end do - end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & - (0.5_wp*(rho_R*(vel_R(1))**2.0 + & - pres_R)*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j + 1))) - + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & - (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j + 1))) + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) + & - (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dx(j + 1))) - + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & - (0.5_wp*(vel_R(1)*(E_R + & - pres_R))*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dx(j + 1))) + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*(rho_R*(vel_R(1))**2.0 + & - pres_R)*(1._wp/dx(j)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j))) - + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j))) + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dx(j)) + & - 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dx(j))) - + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_R(1)*(E_R + & - pres_R))*(1._wp/dx(j)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dx(j))) - - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - end if - else if (idir == 2) then - if (p == 0) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') - do l = 0, p - do k = -1, n - do j = 0, m - - if (viscous) then - vflux_L_arr = 0._wp - vflux_R_arr = 0._wp - - #:if MFC_CASE_OPTIMIZATION - #:if igr_order == 5 - !DIR$ unroll 6 - #:elif igr_order == 3 - !DIR$ unroll 4 - #:endif - #:endif - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - dvel_small = 0._wp - !x-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i, k + q, l) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(1) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb)%sf(j + 1, k + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j - 1, k + q, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 1)%sf(j + 1, k + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j - 1, k + q, l)/rho_sf_small(-1)) - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(1))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(1))/3._wp - end if - - !y-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j, k + i + q, l) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(1) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(2))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(2))/3._wp - end if - end do - end if - - alpha_rho_L = 0._wp; alpha_rho_R = 0._wp - alpha_L = 0._wp; alpha_R = 0._wp - vel_L = 0._wp; vel_R = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k + q, l) - end do + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j)) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) - end do - else - alpha_L(1) = 1._wp - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dx(j + 1)) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) - end do - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dx(j)) - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k + q, l) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dx(j + 1)) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) - end do - else - alpha_R(1) = 1._wp - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dx(j)) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) - end do - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j + 1)) - if (num_fluids > 1) then - alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) - alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j)) - rho_L = sum(alpha_rho_L) - gamma_L = sum(alpha_L*gammas) - pi_inf_L = sum(alpha_L*pi_infs) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j + 1)) - rho_R = sum(alpha_rho_R) - gamma_R = sum(alpha_R*gammas) - pi_inf_R = sum(alpha_R*pi_infs) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j)) + end if - vel_L = vel_L/rho_L - vel_R = vel_R/rho_R + E_L = 0._wp; E_R = 0._wp - if (viscous) then - mu_L = 0._wp - mu_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - mu_L = alpha_L(i)/Res_igr(1, i) + mu_L - mu_R = alpha_R(i)/Res_igr(1, i) + mu_R - end do + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j + q, k, l) + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k + 1)) + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j + q, k, l) + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k)) + call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & + E_R, gamma_R, pi_inf_R, rho_R, vel_R, & + pres_L, pres_R, cfl) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k + 1)) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(1))*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(1))*(1._wp/dx(j)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j))) + end do + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k + 1)) + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_L(i)* & + vel_L(1))*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k)) + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_L(1)*(1._wp/dx(j + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k + 1)) + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_L(i)* & + vel_L(1))*(1._wp/dx(j)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k)) - end if - - E_L = 0._wp; E_R = 0._wp - F_L = 0._wp; F_R = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k + q, l) - F_L = F_L + coeff_L(q)*jac(j, k + q, l) + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(1)*(1._wp/dx(j))) end do + end if - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k + q, l) - F_R = F_R + coeff_R(q)*jac(j, k + q, l) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + (0.5_wp*(rho_L*(vel_L(1))**2.0 + & + pres_L)*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j + 1))) - call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & - E_R, gamma_R, pi_inf_R, rho_R, vel_R, & - pres_L, pres_R, cfl) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j + 1))) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(2))*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) + & + (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dx(j + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(2))*(1._wp/dy(k)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k))) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + (0.5_wp*(vel_L(1)*(E_L + & + pres_L))*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dx(j + 1))) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & - (0.5_wp*(alpha_L(i)* & - vel_L(2))*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_L(2)*(1._wp/dy(k + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_L(i)* & - vel_L(2))*(1._wp/dy(k)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*(rho_L*(vel_L(1))**2.0 + & + pres_L)*(1._wp/dx(j)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(2)*(1._wp/dy(k))) - end do - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & - (0.5_wp*(rho_L*(vel_L(2))**2.0 + & - pres_L + F_L)*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dx(j)) - & + 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dx(j))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_L(1)*(E_L + & + pres_L))*(1._wp/dx(j)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dx(j))) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & - (0.5_wp*(vel_L(2)*(E_L + & - pres_L + F_L))*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dy(k + 1))) + rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(1))*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*(rho_L*(vel_L(2))**2.0 + & - pres_L + F_L)*(1._wp/dy(k)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k))) + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(1))*(1._wp/dx(j)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j))) + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k))) + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_R(i)* & + vel_R(1))*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_L(2)*(E_L + & - pres_L + F_L))*(1._wp/dy(k)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_R(1)*(1._wp/dx(j + 1))) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(2))*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k + 1))) + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_R(i)* & + vel_R(1))*(1._wp/dx(j)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j))) + $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(2))*(1._wp/dy(k)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k))) + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(1)*(1._wp/dx(j))) end do + end if - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & - (0.5_wp*(alpha_R(i)* & - vel_R(2))*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_R(2)*(1._wp/dy(k + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_R(i)* & - vel_R(2))*(1._wp/dy(k)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + (0.5_wp*(rho_R*(vel_R(1))**2.0 + & + pres_R)*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(2)*(1._wp/dy(k))) - end do - end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & - (0.5_wp*(rho_R*(vel_R(2))**2.0 + & - pres_R + F_R)*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & - (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & - (0.5_wp*(vel_R(2)*(E_R + & - pres_R + F_R))*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*(rho_R*(vel_R(2))**2.0 + & - pres_R + F_R)*(1._wp/dy(k)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_R(2)*(E_R + & - pres_R + F_R))*(1._wp/dy(k)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dy(k))) - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - else - #:call GPU_PARALLEL_LOOP(collapse=3, private='[rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') - do l = 0, p - do k = -1, n - do j = 0, m - - if (viscous) then - vflux_L_arr = 0._wp - vflux_R_arr = 0._wp - - #:if MFC_CASE_OPTIMIZATION - #:if igr_order == 5 - !DIR$ unroll 6 - #:elif igr_order == 3 - !DIR$ unroll 4 - #:endif - #:endif - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - dvel_small = 0._wp - !x-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i, k + q, l) - end do - rho_sf_small(i) = rho_L - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & + (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j + 1))) - dvel_small(1) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb)%sf(j + 1, k + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j - 1, k + q, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 1)%sf(j + 1, k + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j - 1, k + q, l)/rho_sf_small(-1)) - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(1))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(1))/3._wp - end if - - !y-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j, k + i + q, l) - end do - rho_sf_small(i) = rho_L - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) + & + (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dx(j + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + (0.5_wp*(vel_R(1)*(E_R + & + pres_R))*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dx(j + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*(rho_R*(vel_R(1))**2.0 + & + pres_R)*(1._wp/dx(j)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j))) - dvel_small(1) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 2)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(3)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(2))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(3)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(2))/3._wp - end if - - !z-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j, k + q, l + i) - end do - rho_sf_small(i) = rho_L - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dx(j)) + & + 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dx(j))) - dvel_small(2) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 1)%sf(j, k + q, l + 1)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k + q, l - 1)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 2)%sf(j, k + q, l + 1)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j, k + q, l - 1)/rho_sf_small(-1)) - if (q > vidxb) then - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(3))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(3))/3._wp - end if - end do - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_R(1)*(E_R + & + pres_R))*(1._wp/dx(j)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dx(j))) + + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + else if (idir == 2) then + if (p == 0) then + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + do l = 0, p + do k = -1, n + do j = 0, m - alpha_rho_L = 0._wp; alpha_rho_R = 0._wp - alpha_L = 0._wp; alpha_R = 0._wp - vel_L = 0._wp; vel_R = 0._wp + if (viscous) then + vflux_L_arr = 0._wp + vflux_R_arr = 0._wp + #:if MFC_CASE_OPTIMIZATION + #:if igr_order == 5 + !DIR$ unroll 6 + #:elif igr_order == 3 + !DIR$ unroll 4 + #:endif + #:endif $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe + do q = vidxb, vidxe + dvel_small = 0._wp + !x-direction contributions $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k + q, l) - end do - - if (num_fluids > 1) then + do i = -1, 1 + rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + i, k + q, l) end do - else - alpha_L(1) = 1._wp - end if - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + rho_sf_small(i) = rho_L end do - end do - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k + q, l) - end do + dvel_small(1) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb)%sf(j + 1, k + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j - 1, k + q, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 1)%sf(j + 1, k + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j - 1, k + q, l)/rho_sf_small(-1)) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) - end do - else - alpha_R(1) = 1._wp + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(1))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(1))/3._wp end if + !y-direction contributions $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j, k + i + q, l) + end do + rho_sf_small(i) = rho_L end do - end do - if (num_fluids > 1) then - alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) - alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) - end if + dvel_small(1) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 1)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - rho_L = sum(alpha_rho_L) - gamma_L = sum(alpha_L*gammas) - pi_inf_L = sum(alpha_L*pi_infs) + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(2))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(2))/3._wp + end if + end do + end if - rho_R = sum(alpha_rho_R) - gamma_R = sum(alpha_R*gammas) - pi_inf_R = sum(alpha_R*pi_infs) + alpha_rho_L = 0._wp; alpha_rho_R = 0._wp + alpha_L = 0._wp; alpha_R = 0._wp + vel_L = 0._wp; vel_R = 0._wp - vel_L = vel_L/rho_L - vel_R = vel_R/rho_R + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k + q, l) + end do - if (viscous) then - mu_L = 0._wp - mu_R = 0._wp + if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - mu_L = alpha_L(i)/Res_igr(1, i) + mu_L - mu_R = alpha_R(i)/Res_igr(1, i) + mu_R + do i = 1, num_fluids - 1 + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) end do - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dy(k + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dy(k)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dy(k + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dy(k)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k)) + else + alpha_L(1) = 1._wp end if - E_L = 0._wp; E_R = 0._wp - F_L = 0._wp; F_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + end do + end do + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k + q, l) - F_L = F_L + coeff_L(q)*jac(j, k + q, l) + do i = 1, num_fluids + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k + q, l) end do + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) + end do + else + alpha_R(1) = 1._wp + end if + $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k + q, l) - F_R = F_R + coeff_R(q)*jac(j, k + q, l) + do i = 1, num_dims + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) end do + end do + + if (num_fluids > 1) then + alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) + alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) + end if - call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & - E_R, gamma_R, pi_inf_R, rho_R, vel_R, & - pres_L, pres_R, cfl) + rho_L = sum(alpha_rho_L) + gamma_L = sum(alpha_L*gammas) + pi_inf_L = sum(alpha_L*pi_infs) + + rho_R = sum(alpha_rho_R) + gamma_R = sum(alpha_R*gammas) + pi_inf_R = sum(alpha_R*pi_infs) + + vel_L = vel_L/rho_L + vel_R = vel_R/rho_R + if (viscous) then + mu_L = 0._wp + mu_R = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(2))*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(2))*(1._wp/dy(k)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k))) + mu_L = alpha_L(i)/Res_igr(1, i) + mu_L + mu_R = alpha_R(i)/Res_igr(1, i) + mu_R end do - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & - (0.5_wp*(alpha_L(i)* & - vel_L(2))*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_L(2)*(1._wp/dy(k + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_L(i)* & - vel_L(2))*(1._wp/dy(k)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(2)*(1._wp/dy(k))) - end do - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & - (0.5_wp*(rho_L*(vel_L(2))**2.0 + & - pres_L + F_L)*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k + 1))) + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k + 1))) + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) + & - (0.5_wp*rho_L*vel_L(3)*vel_L(2)*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dy(k + 1))) + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & - (0.5_wp*(vel_L(2)*(E_L + & - pres_L + F_L))*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dy(k + 1))) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*(rho_L*(vel_L(2))**2.0 + & - pres_L + F_L)*(1._wp/dy(k)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k))) + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k))) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k)) + end if + + E_L = 0._wp; E_R = 0._wp + F_L = 0._wp; F_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k + q, l) + F_L = F_L + coeff_L(q)*jac(j, k + q, l) + end do + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k + q, l) + F_R = F_R + coeff_R(q)*jac(j, k + q, l) + end do + + call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & + E_R, gamma_R, pi_inf_R, rho_R, vel_R, & + pres_L, pres_R, cfl) + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(3)*vel_L(2)*(1._wp/dy(k)) - & - 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dy(k))) + rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(2))*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_L(2)*(E_L + & - pres_L + F_L))*(1._wp/dy(k)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dy(k))) + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(2))*(1._wp/dy(k)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k))) + end do + if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids + do i = 1, num_fluids - 1 $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(2))*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k + 1))) + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & + (0.5_wp*(alpha_L(i)* & + vel_L(2))*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(2))*(1._wp/dy(k)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k))) + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_L(2)*(1._wp/dy(k + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_L(i)* & + vel_L(2))*(1._wp/dy(k)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(2)*(1._wp/dy(k))) end do + end if - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & - (0.5_wp*(alpha_R(i)* & - vel_R(2))*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_R(2)*(1._wp/dy(k + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_R(i)* & - vel_R(2))*(1._wp/dy(k)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & + (0.5_wp*(rho_L*(vel_L(2))**2.0 + & + pres_L + F_L)*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(2)*(1._wp/dy(k))) - end do - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & - (0.5_wp*(rho_R*(vel_R(2))**2.0 + & - pres_R + F_R)*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & + (0.5_wp*(vel_L(2)*(E_L + & + pres_L + F_L))*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & - (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*(rho_L*(vel_L(2))**2.0 + & + pres_L + F_L)*(1._wp/dy(k)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) + & - (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & - (0.5_wp*(vel_R(2)*(E_R + & - pres_R + F_R))*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_L(2)*(E_L + & + pres_L + F_L))*(1._wp/dy(k)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dy(k))) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*(rho_R*(vel_R(2))**2.0 + & - pres_R + F_R)*(1._wp/dy(k)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k))) + rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(2))*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(2))*(1._wp/dy(k)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k))) + end do + + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & + (0.5_wp*(alpha_R(i)* & + vel_R(2))*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_R(2)*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dy(k)) + & - 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_R(i)* & + vel_R(2))*(1._wp/dy(k)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(2)*(1._wp/dy(k))) + end do + end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & + (0.5_wp*(rho_R*(vel_R(2))**2.0 + & + pres_R + F_R)*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & + (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & (0.5_wp*(vel_R(2)*(E_R + & - pres_R + F_R))*(1._wp/dy(k)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dy(k))) - - end do + pres_R + F_R))*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*(rho_R*(vel_R(2))**2.0 + & + pres_R + F_R)*(1._wp/dy(k)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_R(2)*(E_R + & + pres_R + F_R))*(1._wp/dy(k)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dy(k))) end do end do - #:endcall GPU_PARALLEL_LOOP - end if - elseif (idir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') - do l = -1, p - do k = 0, n + end do + $:END_GPU_PARALLEL_LOOP() + else + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + do l = 0, p + do k = -1, n do j = 0, m if (viscous) then @@ -2123,24 +1666,24 @@ contains rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i, k, l + q) + rho_L = rho_L + q_cons_vf(r)%sf(j + i, k + q, l) end do rho_sf_small(i) = rho_L end do dvel_small(1) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb)%sf(j + 1, k, l + q)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j - 1, k, l + q)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 2)%sf(j + 1, k, l + q)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j - 1, k, l + q)/rho_sf_small(-1)) + q_cons_vf(momxb)%sf(j + 1, k + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j - 1, k + q, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 1)%sf(j + 1, k + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j - 1, k + q, l)/rho_sf_small(-1)) if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(3)) + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(1))/3._wp end if if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(3)) + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(1))/3._wp end if @@ -2150,25 +1693,30 @@ contains rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j, k + i, l + q) + rho_L = rho_L + q_cons_vf(r)%sf(j, k + i + q, l) end do rho_sf_small(i) = rho_L end do + dvel_small(1) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j, k + 1, l + q)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k - 1, l + q)/rho_sf_small(-1)) + q_cons_vf(momxb + 1)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) dvel_small(3) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 2)%sf(j, k + 1, l + q)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j, k - 1, l + q)/rho_sf_small(-1)) + q_cons_vf(momxb + 2)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(3)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(2))/3._wp end if if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(3)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(2))/3._wp end if !z-direction contributions @@ -2177,28 +1725,24 @@ contains rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j, k, l + i + q) + rho_L = rho_L + q_cons_vf(r)%sf(j, k + q, l + i) end do rho_sf_small(i) = rho_L end do - dvel_small(1) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 1)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) + q_cons_vf(momxb + 1)%sf(j, k + q, l + 1)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k + q, l - 1)/rho_sf_small(-1)) dvel_small(3) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 2)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) + q_cons_vf(momxb + 2)%sf(j, k + q, l + 1)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j, k + q, l - 1)/rho_sf_small(-1)) if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(3))/3._wp + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(3))/3._wp end if if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(3))/3._wp + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(3))/3._wp end if end do end if @@ -2211,13 +1755,13 @@ contains do q = vidxb + 1, vidxe $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k, l + q) + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k + q, l) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k, l + q) + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) end do else alpha_L(1) = 1._wp @@ -2225,7 +1769,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k, l + q) + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) end do end do @@ -2233,13 +1777,13 @@ contains do q = vidxb, vidxe - 1 $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k, l + q) + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k + q, l) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k, l + q) + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) end do else alpha_R(1) = 1._wp @@ -2247,7 +1791,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k, l + q) + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) end do end do @@ -2277,88 +1821,88 @@ contains end do $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) - & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dz(l + 1)) + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dz(l + 1)) + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dz(l)) + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dz(l)) + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) - & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dz(l + 1)) + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dz(l + 1)) + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dz(l)) + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dz(l)) + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) - & - 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dz(l + 1)) + rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(2)*(1._wp/dz(l + 1)) + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dz(l)) + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(2)*(1._wp/dz(l)) + 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) - & - 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dz(l + 1)) + rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(2)*(1._wp/dz(l + 1)) + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dz(l)) + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(2)*(1._wp/dz(l)) + 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) - & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dz(l + 1)) + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(3)*(1._wp/dz(l + 1)) + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dz(l)) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(3)*(1._wp/dz(l)) + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) - & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dz(l + 1)) + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(3)*(1._wp/dz(l + 1)) + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dz(l)) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(3)*(1._wp/dz(l)) + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k)) end if E_L = 0._wp; E_R = 0._wp @@ -2366,14 +1910,14 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k, l + q) - F_L = F_L + coeff_L(q)*jac(j, k, l + q) + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k + q, l) + F_L = F_L + coeff_L(q)*jac(j, k + q, l) end do $:GPU_LOOP(parallelism='[seq]') do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k, l + q) - F_R = F_R + coeff_R(q)*jac(j, k, l + q) + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k + q, l) + F_R = F_R + coeff_R(q)*jac(j, k + q, l) end do call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & @@ -2383,175 +1927,631 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l + 1) = rhs_vf(i)%sf(j, k, l + 1) + & + rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & (0.5_wp*(alpha_rho_L(i)* & - vel_L(3))*(1._wp/dz(l + 1)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dz(l + 1))) + vel_L(2))*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & (0.5_wp*(alpha_rho_L(i)* & - vel_L(3))*(1._wp/dz(l)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dz(l))) + vel_L(2))*(1._wp/dy(k)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k))) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) + & + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & (0.5_wp*(alpha_L(i)* & - vel_L(3))*(1._wp/dz(l + 1)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dz(l + 1))) + vel_L(2))*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l + 1)*vel_L(3)*(1._wp/dz(l + 1))) + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_L(2)*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & (0.5_wp*(alpha_L(i)* & - vel_L(3))*(1._wp/dz(l)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dz(l))) + vel_L(2))*(1._wp/dy(k)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k))) $:GPU_ATOMIC(atomic='update') rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(3)*(1._wp/dz(l))) + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(2)*(1._wp/dy(k))) end do end if $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) + & - (0.5_wp*(rho_L*(vel_L(3))**2.0 + & - pres_L + F_L)*(1._wp/dz(l + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dz(l + 1))) + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & + (0.5_wp*(rho_L*(vel_L(2))**2.0 + & + pres_L + F_L)*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) + & - (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dz(l + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dz(l + 1))) + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) + & - (0.5_wp*rho_L*vel_L(2)*vel_L(3)*(1._wp/dz(l + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dz(l + 1))) + rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) + & + (0.5_wp*rho_L*vel_L(3)*vel_L(2)*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) + & - (0.5_wp*(vel_L(3)*(E_L + & - pres_L + F_L))*(1._wp/dz(l + 1)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dz(l + 1))) + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & + (0.5_wp*(vel_L(2)*(E_L + & + pres_L + F_L))*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - (0.5_wp*(rho_L*(vel_L(3))**2.0 + & - pres_L + F_L)*(1._wp/dz(l)) - & - 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dz(l))) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*(rho_L*(vel_L(2))**2.0 + & + pres_L + F_L)*(1._wp/dy(k)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k))) $:GPU_ATOMIC(atomic='update') rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dz(l)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dz(l))) + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(2)*vel_L(3)*(1._wp/dz(l)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dz(l))) + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(3)*vel_L(2)*(1._wp/dy(k)) - & + 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dy(k))) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_L(3)*(E_L + & - pres_L + F_L))*(1._wp/dz(l)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dz(l))) + (0.5_wp*(vel_L(2)*(E_L + & + pres_L + F_L))*(1._wp/dy(k)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dy(k))) $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l + 1) = rhs_vf(i)%sf(j, k, l + 1) + & + rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & (0.5_wp*(alpha_rho_R(i)* & - vel_R(3))*(1._wp/dz(l + 1)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dz(l + 1))) + vel_R(2))*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & (0.5_wp*(alpha_rho_R(i)* & - vel_R(3))*(1._wp/dz(l)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dz(l))) + vel_R(2))*(1._wp/dy(k)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k))) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) + & + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & (0.5_wp*(alpha_R(i)* & - vel_R(3))*(1._wp/dz(l + 1)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dz(l + 1))) + vel_R(2))*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l + 1)*vel_R(3)*(1._wp/dz(l + 1))) + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_R(2)*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & (0.5_wp*(alpha_R(i)* & - vel_R(3))*(1._wp/dz(l)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dz(l))) + vel_R(2))*(1._wp/dy(k)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k))) $:GPU_ATOMIC(atomic='update') rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(3)*(1._wp/dz(l))) + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(2)*(1._wp/dy(k))) end do end if $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) + & - (0.5_wp*(rho_R*(vel_R(3))**2.0 + & - pres_R + F_R)*(1._wp/dz(l + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dz(l + 1))) + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & + (0.5_wp*(rho_R*(vel_R(2))**2.0 + & + pres_R + F_R)*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) + & - (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dz(l + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dz(l + 1))) + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & + (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) + & - (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dz(l + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dz(l + 1))) + rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) + & + (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) + & - (0.5_wp*(vel_R(3)*(E_R + & - pres_R + F_R))*(1._wp/dz(l + 1)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dz(l + 1))) + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & + (0.5_wp*(vel_R(2)*(E_R + & + pres_R + F_R))*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - (0.5_wp*(rho_R*(vel_R(3))**2.0 + & - pres_R + F_R)*(1._wp/dz(l)) + & - 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dz(l))) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*(rho_R*(vel_R(2))**2.0 + & + pres_R + F_R)*(1._wp/dy(k)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k))) $:GPU_ATOMIC(atomic='update') rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dz(l)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dz(l))) + (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dz(l)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dz(l))) + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dy(k)) + & + 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dy(k))) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_R(3)*(E_R + & - pres_R + F_R))*(1._wp/dz(l)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dz(l))) + (0.5_wp*(vel_R(2)*(E_R + & + pres_R + F_R))*(1._wp/dy(k)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dy(k))) + + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + elseif (idir == 3) then + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + do l = -1, p + do k = 0, n + do j = 0, m + + if (viscous) then + vflux_L_arr = 0._wp + vflux_R_arr = 0._wp + + #:if MFC_CASE_OPTIMIZATION + #:if igr_order == 5 + !DIR$ unroll 6 + #:elif igr_order == 3 + !DIR$ unroll 4 + #:endif + #:endif + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe + dvel_small = 0._wp + !x-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + i, k, l + q) + end do + rho_sf_small(i) = rho_L + end do + + dvel_small(1) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb)%sf(j + 1, k, l + q)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j - 1, k, l + q)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 2)%sf(j + 1, k, l + q)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j - 1, k, l + q)/rho_sf_small(-1)) + + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(3)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(1))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(3)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(1))/3._wp + end if + + !y-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j, k + i, l + q) + end do + rho_sf_small(i) = rho_L + end do + + dvel_small(2) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 1)%sf(j, k + 1, l + q)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k - 1, l + q)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 2)%sf(j, k + 1, l + q)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j, k - 1, l + q)/rho_sf_small(-1)) + + if (q > vidxb) then + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(3)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(3)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp + end if + + !z-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j, k, l + i + q) + end do + rho_sf_small(i) = rho_L + end do + dvel_small(1) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb + 1)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb + 2)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(3))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(3))/3._wp + end if + end do + end if + + alpha_rho_L = 0._wp; alpha_rho_R = 0._wp + alpha_L = 0._wp; alpha_R = 0._wp + vel_L = 0._wp; vel_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k, l + q) + end do + + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k, l + q) + end do + else + alpha_L(1) = 1._wp + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k, l + q) + end do + end do + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k, l + q) + end do + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k, l + q) + end do + else + alpha_R(1) = 1._wp + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k, l + q) + end do + end do + + if (num_fluids > 1) then + alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) + alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) + end if + + rho_L = sum(alpha_rho_L) + gamma_L = sum(alpha_L*gammas) + pi_inf_L = sum(alpha_L*pi_infs) + + rho_R = sum(alpha_rho_R) + gamma_R = sum(alpha_R*gammas) + pi_inf_R = sum(alpha_R*pi_infs) + + vel_L = vel_L/rho_L + vel_R = vel_R/rho_R + + if (viscous) then + mu_L = 0._wp + mu_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + mu_L = alpha_L(i)/Res_igr(1, i) + mu_L + mu_R = alpha_R(i)/Res_igr(1, i) + mu_R + end do + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) - & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dz(l + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dz(l + 1)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dz(l)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dz(l)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) - & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dz(l + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dz(l + 1)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dz(l)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dz(l)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) - & + 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dz(l + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(2)*(1._wp/dz(l + 1)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dz(l)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(2)*(1._wp/dz(l)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) - & + 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dz(l + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(2)*(1._wp/dz(l + 1)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dz(l)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(2)*(1._wp/dz(l)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) - & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dz(l + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(3)*(1._wp/dz(l + 1)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dz(l)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(3)*(1._wp/dz(l)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) - & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dz(l + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(3)*(1._wp/dz(l + 1)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dz(l)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(3)*(1._wp/dz(l)) + end if + + E_L = 0._wp; E_R = 0._wp + F_L = 0._wp; F_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k, l + q) + F_L = F_L + coeff_L(q)*jac(j, k, l + q) + end do + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k, l + q) + F_R = F_R + coeff_R(q)*jac(j, k, l + q) + end do + + call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & + E_R, gamma_R, pi_inf_R, rho_R, vel_R, & + pres_L, pres_R, cfl) + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l + 1) = rhs_vf(i)%sf(j, k, l + 1) + & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(3))*(1._wp/dz(l + 1)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(3))*(1._wp/dz(l)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dz(l))) + end do + + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) + & + (0.5_wp*(alpha_L(i)* & + vel_L(3))*(1._wp/dz(l + 1)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l + 1)*vel_L(3)*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_L(i)* & + vel_L(3))*(1._wp/dz(l)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dz(l))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(3)*(1._wp/dz(l))) + end do + end if + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) + & + (0.5_wp*(rho_L*(vel_L(3))**2.0 + & + pres_L + F_L)*(1._wp/dz(l + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) + & + (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dz(l + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) + & + (0.5_wp*rho_L*vel_L(2)*vel_L(3)*(1._wp/dz(l + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) + & + (0.5_wp*(vel_L(3)*(E_L + & + pres_L + F_L))*(1._wp/dz(l + 1)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + (0.5_wp*(rho_L*(vel_L(3))**2.0 + & + pres_L + F_L)*(1._wp/dz(l)) - & + 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dz(l))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dz(l)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dz(l))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(2)*vel_L(3)*(1._wp/dz(l)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dz(l))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_L(3)*(E_L + & + pres_L + F_L))*(1._wp/dz(l)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dz(l))) + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l + 1) = rhs_vf(i)%sf(j, k, l + 1) + & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(3))*(1._wp/dz(l + 1)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(3))*(1._wp/dz(l)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dz(l))) end do + + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) + & + (0.5_wp*(alpha_R(i)* & + vel_R(3))*(1._wp/dz(l + 1)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l + 1)*vel_R(3)*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_R(i)* & + vel_R(3))*(1._wp/dz(l)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dz(l))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(3)*(1._wp/dz(l))) + end do + end if + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) + & + (0.5_wp*(rho_R*(vel_R(3))**2.0 + & + pres_R + F_R)*(1._wp/dz(l + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) + & + (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dz(l + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) + & + (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dz(l + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) + & + (0.5_wp*(vel_R(3)*(E_R + & + pres_R + F_R))*(1._wp/dz(l + 1)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + (0.5_wp*(rho_R*(vel_R(3))**2.0 + & + pres_R + F_R)*(1._wp/dz(l)) + & + 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dz(l))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dz(l)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dz(l))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dz(l)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dz(l))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_R(3)*(E_R + & + pres_R + F_R))*(1._wp/dz(l)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dz(l))) + end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_igr_riemann_solver @@ -2611,49 +2611,49 @@ contains integer, intent(in) :: idir if (idir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(i)%sf(j, k, l) = 1._wp/dx(j)* & - (flux_vf(i)%sf(j - 1, k, l) & - - flux_vf(i)%sf(j, k, l)) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(i)%sf(j, k, l) = 1._wp/dx(j)* & + (flux_vf(i)%sf(j - 1, k, l) & + - flux_vf(i)%sf(j, k, l)) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() elseif (idir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & - (flux_vf(i)%sf(j, k - 1, l) & - - flux_vf(i)%sf(j, k, l)) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & + (flux_vf(i)%sf(j, k - 1, l) & + - flux_vf(i)%sf(j, k, l)) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() elseif (idir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & - (flux_vf(i)%sf(j, k, l - 1) & - - flux_vf(i)%sf(j, k, l)) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & + (flux_vf(i)%sf(j, k, l - 1) & + - flux_vf(i)%sf(j, k, l)) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_igr_flux_add diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index 12ba72809c..98149d4938 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -76,60 +76,60 @@ contains real(wp), dimension(3) :: v, B real(wp) :: divB, vdotB - #:call GPU_PARALLEL_LOOP(collapse=3, private='[v, B]') - do q = 0, p - do l = 0, n - do k = 0, m - - divB = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = -fd_number, fd_number - divB = divB + q_prim_vf(B_idx%beg)%sf(k + r, l, q)*fd_coeff_x_h(r, k) - end do + $:GPU_PARALLEL_LOOP(collapse=3, private='[k,l,q,v, B]') + do q = 0, p + do l = 0, n + do k = 0, m + + divB = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + divB = divB + q_prim_vf(B_idx%beg)%sf(k + r, l, q)*fd_coeff_x_h(r, k) + end do + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + divB = divB + q_prim_vf(B_idx%beg + 1)%sf(k, l + r, q)*fd_coeff_y_h(r, l) + end do + if (p > 0) then $:GPU_LOOP(parallelism='[seq]') do r = -fd_number, fd_number - divB = divB + q_prim_vf(B_idx%beg + 1)%sf(k, l + r, q)*fd_coeff_y_h(r, l) + divB = divB + q_prim_vf(B_idx%beg + 2)%sf(k, l, q + r)*fd_coeff_z_h(r, q) end do - if (p > 0) then - $:GPU_LOOP(parallelism='[seq]') - do r = -fd_number, fd_number - divB = divB + q_prim_vf(B_idx%beg + 2)%sf(k, l, q + r)*fd_coeff_z_h(r, q) - end do - end if - - v(1) = q_prim_vf(momxb)%sf(k, l, q) - v(2) = q_prim_vf(momxb + 1)%sf(k, l, q) - v(3) = q_prim_vf(momxb + 2)%sf(k, l, q) - - B(1) = q_prim_vf(B_idx%beg)%sf(k, l, q) - B(2) = q_prim_vf(B_idx%beg + 1)%sf(k, l, q) - B(3) = q_prim_vf(B_idx%beg + 2)%sf(k, l, q) - - vdotB = sum(v*B) - - ! 1: rho -> unchanged - ! 2: vx -> - (divB) * Bx - ! 3: vy -> - (divB) * By - ! 4: vz -> - (divB) * Bz - ! 5: E -> - (divB) * (vdotB) - ! 6: Bx -> - (divB) * vx - ! 7: By -> - (divB) * vy - ! 8: Bz -> - (divB) * vz - - rhs_vf(momxb)%sf(k, l, q) = rhs_vf(momxb)%sf(k, l, q) - divB*B(1) - rhs_vf(momxb + 1)%sf(k, l, q) = rhs_vf(momxb + 1)%sf(k, l, q) - divB*B(2) - rhs_vf(momxb + 2)%sf(k, l, q) = rhs_vf(momxb + 2)%sf(k, l, q) - divB*B(3) - - rhs_vf(E_idx)%sf(k, l, q) = rhs_vf(E_idx)%sf(k, l, q) - divB*vdotB - - rhs_vf(B_idx%beg)%sf(k, l, q) = rhs_vf(B_idx%beg)%sf(k, l, q) - divB*v(1) - rhs_vf(B_idx%beg + 1)%sf(k, l, q) = rhs_vf(B_idx%beg + 1)%sf(k, l, q) - divB*v(2) - rhs_vf(B_idx%beg + 2)%sf(k, l, q) = rhs_vf(B_idx%beg + 2)%sf(k, l, q) - divB*v(3) + end if + + v(1) = q_prim_vf(momxb)%sf(k, l, q) + v(2) = q_prim_vf(momxb + 1)%sf(k, l, q) + v(3) = q_prim_vf(momxb + 2)%sf(k, l, q) + + B(1) = q_prim_vf(B_idx%beg)%sf(k, l, q) + B(2) = q_prim_vf(B_idx%beg + 1)%sf(k, l, q) + B(3) = q_prim_vf(B_idx%beg + 2)%sf(k, l, q) + + vdotB = sum(v*B) + + ! 1: rho -> unchanged + ! 2: vx -> - (divB) * Bx + ! 3: vy -> - (divB) * By + ! 4: vz -> - (divB) * Bz + ! 5: E -> - (divB) * (vdotB) + ! 6: Bx -> - (divB) * vx + ! 7: By -> - (divB) * vy + ! 8: Bz -> - (divB) * vz + + rhs_vf(momxb)%sf(k, l, q) = rhs_vf(momxb)%sf(k, l, q) - divB*B(1) + rhs_vf(momxb + 1)%sf(k, l, q) = rhs_vf(momxb + 1)%sf(k, l, q) - divB*B(2) + rhs_vf(momxb + 2)%sf(k, l, q) = rhs_vf(momxb + 2)%sf(k, l, q) - divB*B(3) + + rhs_vf(E_idx)%sf(k, l, q) = rhs_vf(E_idx)%sf(k, l, q) - divB*vdotB + + rhs_vf(B_idx%beg)%sf(k, l, q) = rhs_vf(B_idx%beg)%sf(k, l, q) - divB*v(1) + rhs_vf(B_idx%beg + 1)%sf(k, l, q) = rhs_vf(B_idx%beg + 1)%sf(k, l, q) - divB*v(2) + rhs_vf(B_idx%beg + 2)%sf(k, l, q) = rhs_vf(B_idx%beg + 2)%sf(k, l, q) - divB*v(3) - end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_mhd_powell_rhs diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index d578eb7ff7..d0a2ffbfa4 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -313,40 +313,40 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - r = (j + buff_size*(k + (n + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j + pack_offset, k, l) - end do + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + r = (j + buff_size*(k + (n + 1)*l)) + ib_buff_send(r) = ib_markers%sf(j + pack_offset, k, l) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() #:elif mpi_dir == 2 - #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - r = ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l) - end do + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() #:else - #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l + pack_offset) - end do + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + ib_buff_send(r) = ib_markers%sf(j, k, l + pack_offset) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() #:endif end if #:endfor @@ -390,42 +390,42 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - r = (j + buff_size*((k + 1) + (n + 1)*l)) - ib_markers%sf(j + unpack_offset, k, l) = ib_buff_recv(r) - end do + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + r = (j + buff_size*((k + 1) + (n + 1)*l)) + ib_markers%sf(j + unpack_offset, k, l) = ib_buff_recv(r) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() #:elif mpi_dir == 2 - #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - r = ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - ib_markers%sf(j, k + unpack_offset, l) = ib_buff_recv(r) - end do + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + ib_markers%sf(j, k + unpack_offset, l) = ib_buff_recv(r) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() #:else ! Unpacking buffer from bc_z%beg - #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r) - end do + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() #:endif end if #:endfor diff --git a/src/simulation/m_muscl.fpp b/src/simulation/m_muscl.fpp index 335ef7e03f..2c2b7a0149 100644 --- a/src/simulation/m_muscl.fpp +++ b/src/simulation/m_muscl.fpp @@ -118,101 +118,101 @@ contains if (muscl_order == 1) then if (muscl_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_muscl%beg, is3_muscl%end - do k = is2_muscl%beg, is2_muscl%end - do j = is1_muscl%beg, is1_muscl%end - vL_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - vR_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, ubound(v_vf, 1) + do l = is3_muscl%beg, is3_muscl%end + do k = is2_muscl%beg, is2_muscl%end + do j = is1_muscl%beg, is1_muscl%end + vL_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + vR_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) end do end do end do - #:endcall + end do + $:END_OMP_PARALLEL_LOOP() else if (muscl_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_muscl%beg, is3_muscl%end - do k = is2_muscl%beg, is2_muscl%end - do j = is1_muscl%beg, is1_muscl%end - vL_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, ubound(v_vf, 1) + do l = is3_muscl%beg, is3_muscl%end + do k = is2_muscl%beg, is2_muscl%end + do j = is1_muscl%beg, is1_muscl%end + vL_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + vR_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) end do end do end do - #:endcall + end do + $:END_OMP_PARALLEL_LOOP() else if (muscl_dir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_muscl%beg, is3_muscl%end - do k = is2_muscl%beg, is2_muscl%end - do j = is1_muscl%beg, is1_muscl%end - vL_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, ubound(v_vf, 1) + do l = is3_muscl%beg, is3_muscl%end + do k = is2_muscl%beg, is2_muscl%end + do j = is1_muscl%beg, is1_muscl%end + vL_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + vR_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) end do end do end do - #:endcall + end do + $:END_OMP_PARALLEL_LOOP() end if else if (muscl_order == 2) then ! MUSCL Reconstruction #:for MUSCL_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (muscl_dir == ${MUSCL_DIR}$) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[slopeL,slopeR,slope]') - do l = is3_muscl%beg, is3_muscl%end - do k = is2_muscl%beg, is2_muscl%end - do j = is1_muscl%beg, is1_muscl%end - do i = 1, v_size - - slopeL = v_rs_ws_${XYZ}$ (j + 1, k, l, i) - & - v_rs_ws_${XYZ}$ (j, k, l, i) - slopeR = v_rs_ws_${XYZ}$ (j, k, l, i) - & - v_rs_ws_${XYZ}$ (j - 1, k, l, i) - slope = 0._wp - - if (muscl_lim == 1) then ! minmod - if (slopeL*slopeR > 1e-9_wp) then - slope = min(abs(slopeL), abs(slopeR)) - end if - if (slopeL < 0._wp) slope = -slope - elseif (muscl_lim == 2) then ! MC - if (slopeL*slopeR > 1e-9_wp) then - slope = min(2._wp*abs(slopeL), 2._wp*abs(slopeR)) - slope = min(slope, 5e-1_wp*(abs(slopeL) + abs(slopeR))) - end if - if (slopeL < 0._wp) slope = -slope - elseif (muscl_lim == 3) then ! Van Albada - if (abs(slopeL) > 1e-6_wp .and. abs(slopeR) > 1e-6_wp .and. & - abs(slopeL + slopeR) > 1e-6_wp .and. slopeL*slopeR > 1e-6_wp) then - slope = ((slopeL + slopeR)*slopeL*slopeR)/(slopeL**2._wp + slopeR**2._wp) - end if - elseif (muscl_lim == 4) then ! Van Leer - if (abs(slopeL + slopeR) > 1.e-6_wp .and. slopeL*slopeR > 1.e-6_wp) then - slope = 2._wp*slopeL*slopeR/(slopeL + slopeR) - end if - elseif (muscl_lim == 5) then ! SUPERBEE - if (slopeL*slopeR > 1e-6_wp) then - slope = -1._wp*min(-min(2._wp*abs(slopeL), abs(slopeR)), -min(abs(slopeL), 2._wp*abs(slopeR))) - end if + $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,slopeL,slopeR,slope]') + do l = is3_muscl%beg, is3_muscl%end + do k = is2_muscl%beg, is2_muscl%end + do j = is1_muscl%beg, is1_muscl%end + do i = 1, v_size + + slopeL = v_rs_ws_${XYZ}$ (j + 1, k, l, i) - & + v_rs_ws_${XYZ}$ (j, k, l, i) + slopeR = v_rs_ws_${XYZ}$ (j, k, l, i) - & + v_rs_ws_${XYZ}$ (j - 1, k, l, i) + slope = 0._wp + + if (muscl_lim == 1) then ! minmod + if (slopeL*slopeR > 1e-9_wp) then + slope = min(abs(slopeL), abs(slopeR)) + end if + if (slopeL < 0._wp) slope = -slope + elseif (muscl_lim == 2) then ! MC + if (slopeL*slopeR > 1e-9_wp) then + slope = min(2._wp*abs(slopeL), 2._wp*abs(slopeR)) + slope = min(slope, 5e-1_wp*(abs(slopeL) + abs(slopeR))) + end if + if (slopeL < 0._wp) slope = -slope + elseif (muscl_lim == 3) then ! Van Albada + if (abs(slopeL) > 1e-6_wp .and. abs(slopeR) > 1e-6_wp .and. & + abs(slopeL + slopeR) > 1e-6_wp .and. slopeL*slopeR > 1e-6_wp) then + slope = ((slopeL + slopeR)*slopeL*slopeR)/(slopeL**2._wp + slopeR**2._wp) + end if + elseif (muscl_lim == 4) then ! Van Leer + if (abs(slopeL + slopeR) > 1.e-6_wp .and. slopeL*slopeR > 1.e-6_wp) then + slope = 2._wp*slopeL*slopeR/(slopeL + slopeR) + end if + elseif (muscl_lim == 5) then ! SUPERBEE + if (slopeL*slopeR > 1e-6_wp) then + slope = -1._wp*min(-min(2._wp*abs(slopeL), abs(slopeR)), -min(abs(slopeL), 2._wp*abs(slopeR))) end if + end if - ! reconstruct from left side - vL_rs_vf_${XYZ}$ (j, k, l, i) = & - v_rs_ws_${XYZ}$ (j, k, l, i) - (5.e-1_wp*slope) + ! reconstruct from left side + vL_rs_vf_${XYZ}$ (j, k, l, i) = & + v_rs_ws_${XYZ}$ (j, k, l, i) - (5.e-1_wp*slope) - ! reconstruct from the right side - vR_rs_vf_${XYZ}$ (j, k, l, i) = & - v_rs_ws_${XYZ}$ (j, k, l, i) + (5.e-1_wp*slope) + ! reconstruct from the right side + vR_rs_vf_${XYZ}$ (j, k, l, i) = & + v_rs_ws_${XYZ}$ (j, k, l, i) + (5.e-1_wp*slope) - end do end do end do end do - #:endcall + end do + $:END_OMP_PARALLEL_LOOP() end if #:endfor end if @@ -243,60 +243,60 @@ contains #:for MUSCL_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (muscl_dir == ${MUSCL_DIR}$) then - #:call GPU_PARALLEL_LOOP(collapse=3,private='[aCL,aC,aCR,aTHINC,moncon,sign,qmin,qmax]') - do l = is3_muscl%beg, is3_muscl%end - do k = is2_muscl%beg, is2_muscl%end - do j = is1_muscl%beg, is1_muscl%end + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,aCL,aC,aCR,aTHINC,moncon,sign,qmin,qmax]') + do l = is3_muscl%beg, is3_muscl%end + do k = is2_muscl%beg, is2_muscl%end + do j = is1_muscl%beg, is1_muscl%end - aCL = v_rs_ws_${XYZ}$ (j - 1, k, l, advxb) - aC = v_rs_ws_${XYZ}$ (j, k, l, advxb) - aCR = v_rs_ws_${XYZ}$ (j + 1, k, l, advxb) + aCL = v_rs_ws_${XYZ}$ (j - 1, k, l, advxb) + aC = v_rs_ws_${XYZ}$ (j, k, l, advxb) + aCR = v_rs_ws_${XYZ}$ (j + 1, k, l, advxb) - moncon = (aCR - aC)*(aC - aCL) + moncon = (aCR - aC)*(aC - aCL) - if (aC >= ic_eps .and. aC <= 1._wp - ic_eps .and. moncon > moncon_cutoff) then ! Interface cell - - if (aCR - aCL > 0._wp) then - sign = 1._wp - else - sign = -1._wp - end if - - qmin = min(aCR, aCL) - qmax = max(aCR, aCL) - qmin - - C = (aC - qmin + sgm_eps)/(qmax + sgm_eps) - B = exp(sign*ic_beta*(2._wp*C - 1._wp)) - A = (B/cosh(ic_beta) - 1._wp)/tanh(ic_beta) - - ! Left reconstruction - aTHINC = qmin + 5e-1_wp*qmax*(1._wp + sign*A) - if (aTHINC < ic_eps) aTHINC = ic_eps - if (aTHINC > 1 - ic_eps) aTHINC = 1 - ic_eps - vL_rs_vf_${XYZ}$ (j, k, l, contxb) = vL_rs_vf_${XYZ}$ (j, k, l, contxb)/ & - vL_rs_vf_${XYZ}$ (j, k, l, advxb)*aTHINC - vL_rs_vf_${XYZ}$ (j, k, l, contxe) = vL_rs_vf_${XYZ}$ (j, k, l, contxe)/ & - (1._wp - vL_rs_vf_${XYZ}$ (j, k, l, advxb))*(1._wp - aTHINC) - vL_rs_vf_${XYZ}$ (j, k, l, advxb) = aTHINC - vL_rs_vf_${XYZ}$ (j, k, l, advxe) = 1 - aTHINC - - ! Right reconstruction - aTHINC = qmin + 5e-1_wp*qmax*(1._wp + sign*(tanh(ic_beta) + A)/(1._wp + A*tanh(ic_beta))) - if (aTHINC < ic_eps) aTHINC = ic_eps - if (aTHINC > 1 - ic_eps) aTHINC = 1 - ic_eps - vR_rs_vf_${XYZ}$ (j, k, l, contxb) = vL_rs_vf_${XYZ}$ (j, k, l, contxb)/ & - vL_rs_vf_${XYZ}$ (j, k, l, advxb)*aTHINC - vR_rs_vf_${XYZ}$ (j, k, l, contxe) = vL_rs_vf_${XYZ}$ (j, k, l, contxe)/ & - (1._wp - vL_rs_vf_${XYZ}$ (j, k, l, advxb))*(1._wp - aTHINC) - vR_rs_vf_${XYZ}$ (j, k, l, advxb) = aTHINC - vR_rs_vf_${XYZ}$ (j, k, l, advxe) = 1 - aTHINC + if (aC >= ic_eps .and. aC <= 1._wp - ic_eps .and. moncon > moncon_cutoff) then ! Interface cell + if (aCR - aCL > 0._wp) then + sign = 1._wp + else + sign = -1._wp end if - end do + qmin = min(aCR, aCL) + qmax = max(aCR, aCL) - qmin + + C = (aC - qmin + sgm_eps)/(qmax + sgm_eps) + B = exp(sign*ic_beta*(2._wp*C - 1._wp)) + A = (B/cosh(ic_beta) - 1._wp)/tanh(ic_beta) + + ! Left reconstruction + aTHINC = qmin + 5e-1_wp*qmax*(1._wp + sign*A) + if (aTHINC < ic_eps) aTHINC = ic_eps + if (aTHINC > 1 - ic_eps) aTHINC = 1 - ic_eps + vL_rs_vf_${XYZ}$ (j, k, l, contxb) = vL_rs_vf_${XYZ}$ (j, k, l, contxb)/ & + vL_rs_vf_${XYZ}$ (j, k, l, advxb)*aTHINC + vL_rs_vf_${XYZ}$ (j, k, l, contxe) = vL_rs_vf_${XYZ}$ (j, k, l, contxe)/ & + (1._wp - vL_rs_vf_${XYZ}$ (j, k, l, advxb))*(1._wp - aTHINC) + vL_rs_vf_${XYZ}$ (j, k, l, advxb) = aTHINC + vL_rs_vf_${XYZ}$ (j, k, l, advxe) = 1 - aTHINC + + ! Right reconstruction + aTHINC = qmin + 5e-1_wp*qmax*(1._wp + sign*(tanh(ic_beta) + A)/(1._wp + A*tanh(ic_beta))) + if (aTHINC < ic_eps) aTHINC = ic_eps + if (aTHINC > 1 - ic_eps) aTHINC = 1 - ic_eps + vR_rs_vf_${XYZ}$ (j, k, l, contxb) = vL_rs_vf_${XYZ}$ (j, k, l, contxb)/ & + vL_rs_vf_${XYZ}$ (j, k, l, advxb)*aTHINC + vR_rs_vf_${XYZ}$ (j, k, l, contxe) = vL_rs_vf_${XYZ}$ (j, k, l, contxe)/ & + (1._wp - vL_rs_vf_${XYZ}$ (j, k, l, advxb))*(1._wp - aTHINC) + vR_rs_vf_${XYZ}$ (j, k, l, advxb) = aTHINC + vR_rs_vf_${XYZ}$ (j, k, l, advxe) = 1 - aTHINC + + end if + end do end do - #:endcall + end do + $:END_OMP_PARALLEL_LOOP() end if #:endfor @@ -318,50 +318,50 @@ contains $:GPU_UPDATE(device='[v_size]') if (muscl_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do j = 1, v_size - do q = is3_muscl%beg, is3_muscl%end - do l = is2_muscl%beg, is2_muscl%end - do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn - v_rs_ws_x(k, l, q, j) = v_vf(j)%sf(k, l, q) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) + do j = 1, v_size + do q = is3_muscl%beg, is3_muscl%end + do l = is2_muscl%beg, is2_muscl%end + do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn + v_rs_ws_x(k, l, q, j) = v_vf(j)%sf(k, l, q) end do end do end do - #:endcall + end do + $:END_OMP_PARALLEL_LOOP() end if ! Reshaping/Projecting onto Characteristic Fields in y-direction if (n == 0) return if (muscl_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do j = 1, v_size - do q = is3_muscl%beg, is3_muscl%end - do l = is2_muscl%beg, is2_muscl%end - do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn - v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) + do j = 1, v_size + do q = is3_muscl%beg, is3_muscl%end + do l = is2_muscl%beg, is2_muscl%end + do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn + v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q) end do end do end do - #:endcall + end do + $:END_OMP_PARALLEL_LOOP() end if ! Reshaping/Projecting onto Characteristic Fields in z-direction if (p == 0) return if (muscl_dir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do j = 1, v_size - do q = is3_muscl%beg, is3_muscl%end - do l = is2_muscl%beg, is2_muscl%end - do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn - v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) + do j = 1, v_size + do q = is3_muscl%beg, is3_muscl%end + do l = is2_muscl%beg, is2_muscl%end + do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn + v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k) end do end do end do - #:endcall + end do + $:END_OMP_PARALLEL_LOOP() end if end subroutine s_initialize_muscl diff --git a/src/simulation/m_pressure_relaxation.fpp b/src/simulation/m_pressure_relaxation.fpp index 407c01ff10..4c6618dd95 100644 --- a/src/simulation/m_pressure_relaxation.fpp +++ b/src/simulation/m_pressure_relaxation.fpp @@ -70,15 +70,15 @@ contains type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf integer :: j, k, l - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - call s_relax_cell_pressure(q_cons_vf, j, k, l) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + call s_relax_cell_pressure(q_cons_vf, j, k, l) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_pressure_relaxation_procedure diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 85dd51b166..67d29cc2c1 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -433,132 +433,132 @@ contains end select if (.not. polytropic) then - #:call GPU_PARALLEL_LOOP(collapse=5,private='[nb_q,nR,nR2,R,R2,nb_dot,nR_dot,nR2_dot,var,AX]') - do i = 1, nb - do q = 1, nnode - do l = 0, p - do k = 0, n - do j = 0, m - nb_q = q_cons_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) - nR = q_cons_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - nR2 = q_cons_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - R = q_prim_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - R2 = q_prim_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - var = max(R2 - R**2._wp, verysmall) - if (q <= 2) then - AX = R - sqrt(var) + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,nb_q,nR,nR2,R,R2,nb_dot,nR_dot,nR2_dot,var,AX]') + do i = 1, nb + do q = 1, nnode + do l = 0, p + do k = 0, n + do j = 0, m + nb_q = q_cons_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) + nR = q_cons_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) + nR2 = q_cons_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) + R = q_prim_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) + R2 = q_prim_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) + var = max(R2 - R**2._wp, verysmall) + if (q <= 2) then + AX = R - sqrt(var) + else + AX = R + sqrt(var) + end if + select case (idir) + case (1) + nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) + nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) + nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2)* & + (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) + case (2) + nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) + nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) + nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2)* & + (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) + case (3) + if (is_axisym) then + nb_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l)) + nR_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l)) + nR2_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2)* & + (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) else - AX = R + sqrt(var) + nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) + nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) + nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2)* & + (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) end if + end select + if (q <= 2) then select case (idir) case (1) - nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) - nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2)* & - (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & + (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & + (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) case (2) - nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) - nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2)* & - (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) case (3) if (is_axisym) then - nb_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l)) - nR_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l)) - nR2_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2)* & - (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) - else - nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) - nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2)* & - (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) - end if - end select - if (q <= 2) then - select case (idir) - case (1) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - case (2) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + else + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - case (3) - if (is_axisym) then - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & - (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & - (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - else - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & - (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & - (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - end if - end select - else - select case (idir) - case (1) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & + end if + end select + else + select case (idir) + case (1) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & + (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & + (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) + case (2) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) + case (3) + if (is_axisym) then + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - case (2) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + else + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - case (3) - if (is_axisym) then - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & - (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & - (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - else - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & - (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & - (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - end if - end select - end if - end do + end if + end select + end if end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if ! The following block is not repeated and is left as is if (idir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do q = 0, n - do i = 0, m - rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + mom_sp(2)%sf(i, q, l) - j = bubxb - $:GPU_LOOP(parallelism='[seq]') - do k = 1, nb - rhs_vf(j)%sf(i, q, l) = rhs_vf(j)%sf(i, q, l) + mom_3d(0, 0, k)%sf(i, q, l) - rhs_vf(j + 1)%sf(i, q, l) = rhs_vf(j + 1)%sf(i, q, l) + mom_3d(1, 0, k)%sf(i, q, l) - rhs_vf(j + 2)%sf(i, q, l) = rhs_vf(j + 2)%sf(i, q, l) + mom_3d(0, 1, k)%sf(i, q, l) - rhs_vf(j + 3)%sf(i, q, l) = rhs_vf(j + 3)%sf(i, q, l) + mom_3d(2, 0, k)%sf(i, q, l) - rhs_vf(j + 4)%sf(i, q, l) = rhs_vf(j + 4)%sf(i, q, l) + mom_3d(1, 1, k)%sf(i, q, l) - rhs_vf(j + 5)%sf(i, q, l) = rhs_vf(j + 5)%sf(i, q, l) + mom_3d(0, 2, k)%sf(i, q, l) - j = j + 6 - end do + $:GPU_PARALLEL_LOOP(private='[i,l,q]', collapse=3) + do l = 0, p + do q = 0, n + do i = 0, m + rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + mom_sp(2)%sf(i, q, l) + j = bubxb + $:GPU_LOOP(parallelism='[seq]') + do k = 1, nb + rhs_vf(j)%sf(i, q, l) = rhs_vf(j)%sf(i, q, l) + mom_3d(0, 0, k)%sf(i, q, l) + rhs_vf(j + 1)%sf(i, q, l) = rhs_vf(j + 1)%sf(i, q, l) + mom_3d(1, 0, k)%sf(i, q, l) + rhs_vf(j + 2)%sf(i, q, l) = rhs_vf(j + 2)%sf(i, q, l) + mom_3d(0, 1, k)%sf(i, q, l) + rhs_vf(j + 3)%sf(i, q, l) = rhs_vf(j + 3)%sf(i, q, l) + mom_3d(2, 0, k)%sf(i, q, l) + rhs_vf(j + 4)%sf(i, q, l) = rhs_vf(j + 4)%sf(i, q, l) + mom_3d(1, 1, k)%sf(i, q, l) + rhs_vf(j + 5)%sf(i, q, l) = rhs_vf(j + 5)%sf(i, q, l) + mom_3d(0, 2, k)%sf(i, q, l) + j = j + 6 end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_compute_qbmm_rhs @@ -714,144 +714,144 @@ contains is1_qbmm = ix; is2_qbmm = iy; is3_qbmm = iz $:GPU_UPDATE(device='[is1_qbmm,is2_qbmm,is3_qbmm]') - #:call GPU_PARALLEL_LOOP(collapse=3, private='[moms, msum, wght, abscX, abscY, wght_pb, wght_mv, wght_ht, coeff, ht, r, q, n_tait, B_tait, pres, rho, nbub, c, alf, momsum, drdt, drdt2, chi_vw, x_vw, rho_mw, k_mw, T_bar, grad_T]') - do id3 = is3_qbmm%beg, is3_qbmm%end - do id2 = is2_qbmm%beg, is2_qbmm%end - do id1 = is1_qbmm%beg, is1_qbmm%end + $:GPU_PARALLEL_LOOP(collapse=3, private='[id1,id2,id3,moms, msum, wght, abscX, abscY, wght_pb, wght_mv, wght_ht, coeff, ht, r, q, n_tait, B_tait, pres, rho, nbub, c, alf, momsum, drdt, drdt2, chi_vw, x_vw, rho_mw, k_mw, T_bar, grad_T]') + do id3 = is3_qbmm%beg, is3_qbmm%end + do id2 = is2_qbmm%beg, is2_qbmm%end + do id1 = is1_qbmm%beg, is1_qbmm%end - alf = q_prim_vf(alf_idx)%sf(id1, id2, id3) - pres = q_prim_vf(E_idx)%sf(id1, id2, id3) - rho = q_prim_vf(contxb)%sf(id1, id2, id3) + alf = q_prim_vf(alf_idx)%sf(id1, id2, id3) + pres = q_prim_vf(E_idx)%sf(id1, id2, id3) + rho = q_prim_vf(contxb)%sf(id1, id2, id3) - if (bubble_model == 2) then - n_tait = 1._wp/gammas(1) + 1._wp - B_tait = pi_infs(1)*(n_tait - 1)/n_tait - c = n_tait*(pres + B_tait)*(1._wp - alf)/(rho) - c = merge(sqrt(c), sgm_eps, c > 0._wp) - end if + if (bubble_model == 2) then + n_tait = 1._wp/gammas(1) + 1._wp + B_tait = pi_infs(1)*(n_tait - 1)/n_tait + c = n_tait*(pres + B_tait)*(1._wp - alf)/(rho) + c = merge(sqrt(c), sgm_eps, c > 0._wp) + end if - call s_coeff_selector(pres, rho, c, coeff, polytropic) + call s_coeff_selector(pres, rho, c, coeff, polytropic) - if (alf > small_alf) then - nbub = q_cons_vf(bubxb)%sf(id1, id2, id3) + if (alf > small_alf) then + nbub = q_cons_vf(bubxb)%sf(id1, id2, id3) + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb + ! Gather moments for this bubble bin $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - ! Gather moments for this bubble bin + do r = 2, nmom + moms(r) = q_prim_vf(bubmoms(q, r))%sf(id1, id2, id3) + end do + moms(1) = 1._wp + call s_chyqmom(moms, wght(:, q), abscX(:, q), abscY(:, q)) + + if (polytropic) then $:GPU_LOOP(parallelism='[seq]') - do r = 2, nmom - moms(r) = q_prim_vf(bubmoms(q, r))%sf(id1, id2, id3) + do j = 1, nnode + wght_pb(j, q) = wght(j, q)*(pb0(q) - pv) end do - moms(1) = 1._wp - call s_chyqmom(moms, wght(:, q), abscX(:, q), abscY(:, q)) - - if (polytropic) then - $:GPU_LOOP(parallelism='[seq]') - do j = 1, nnode - wght_pb(j, q) = wght(j, q)*(pb0(q) - pv) - end do - else - $:GPU_LOOP(parallelism='[seq]') - do j = 1, nnode - chi_vw = 1._wp/(1._wp + R_v/R_n*(pb(id1, id2, id3, j, q)/pv - 1._wp)) - x_vw = M_n*chi_vw/(M_v + (M_n - M_v)*chi_vw) - k_mw = x_vw*k_v(q)/(x_vw + (1._wp - x_vw)*phi_vn) + (1._wp - x_vw)*k_n(q)/(x_vw*phi_nv + 1._wp - x_vw) - rho_mw = pv/(chi_vw*R_v*Tw) - rhs_mv(id1, id2, id3, j, q) = -Re_trans_c(q)*((mv(id1, id2, id3, j, q)/(mv(id1, id2, id3, j, q) + mass_n0(q))) - chi_vw) - rhs_mv(id1, id2, id3, j, q) = rho_mw*rhs_mv(id1, id2, id3, j, q)/Pe_c/(1._wp - chi_vw)/abscX(j, q) - T_bar = Tw*(pb(id1, id2, id3, j, q)/pb0(q))*(abscX(j, q)/R0(q))**3*(mass_n0(q) + mass_v0(q))/(mass_n0(q) + mv(id1, id2, id3, j, q)) - grad_T = -Re_trans_T(q)*(T_bar - Tw) - ht(j, q) = pb0(q)*k_mw*grad_T/Pe_T(q)/abscX(j, q) - wght_pb(j, q) = wght(j, q)*(pb(id1, id2, id3, j, q)) - wght_mv(j, q) = wght(j, q)*(rhs_mv(id1, id2, id3, j, q)) - wght_ht(j, q) = wght(j, q)*ht(j, q) - end do - end if - - ! Compute change in moments due to bubble dynamics - r = 1 + else $:GPU_LOOP(parallelism='[seq]') - do i2 = 0, 2 - $:GPU_LOOP(parallelism='[seq]') - do i1 = 0, 2 - if ((i1 + i2) <= 2) then - momsum = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do j = 1, nterms - select case (bubble_model) - case (3) - if (j == 3) then - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_pb(:, q), momrhs(:, i1, i2, j, q)) - else - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght(:, q), momrhs(:, i1, i2, j, q)) - end if - case (2) - if ((j >= 7 .and. j <= 9) .or. (j >= 22 .and. j <= 23) .or. (j >= 10 .and. j <= 11) .or. (j == 26)) then - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_pb(:, q), momrhs(:, i1, i2, j, q)) - else if ((j >= 27 .and. j <= 29) .and. (.not. polytropic)) then - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_mv(:, q), momrhs(:, i1, i2, j, q)) - else if ((j >= 30 .and. j <= 32) .and. (.not. polytropic)) then - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_ht(:, q), momrhs(:, i1, i2, j, q)) - else - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght(:, q), momrhs(:, i1, i2, j, q)) - end if - end select - end do - moms3d(i1, i2, q)%sf(id1, id2, id3) = nbub*momsum - msum(r) = momsum - r = r + 1 - end if - end do + do j = 1, nnode + chi_vw = 1._wp/(1._wp + R_v/R_n*(pb(id1, id2, id3, j, q)/pv - 1._wp)) + x_vw = M_n*chi_vw/(M_v + (M_n - M_v)*chi_vw) + k_mw = x_vw*k_v(q)/(x_vw + (1._wp - x_vw)*phi_vn) + (1._wp - x_vw)*k_n(q)/(x_vw*phi_nv + 1._wp - x_vw) + rho_mw = pv/(chi_vw*R_v*Tw) + rhs_mv(id1, id2, id3, j, q) = -Re_trans_c(q)*((mv(id1, id2, id3, j, q)/(mv(id1, id2, id3, j, q) + mass_n0(q))) - chi_vw) + rhs_mv(id1, id2, id3, j, q) = rho_mw*rhs_mv(id1, id2, id3, j, q)/Pe_c/(1._wp - chi_vw)/abscX(j, q) + T_bar = Tw*(pb(id1, id2, id3, j, q)/pb0(q))*(abscX(j, q)/R0(q))**3*(mass_n0(q) + mass_v0(q))/(mass_n0(q) + mv(id1, id2, id3, j, q)) + grad_T = -Re_trans_T(q)*(T_bar - Tw) + ht(j, q) = pb0(q)*k_mw*grad_T/Pe_T(q)/abscX(j, q) + wght_pb(j, q) = wght(j, q)*(pb(id1, id2, id3, j, q)) + wght_mv(j, q) = wght(j, q)*(rhs_mv(id1, id2, id3, j, q)) + wght_ht(j, q) = wght(j, q)*ht(j, q) end do + end if - ! Compute change in pb and mv for non-polytropic model - if (.not. polytropic) then - $:GPU_LOOP(parallelism='[seq]') - do j = 1, nnode - drdt = msum(2) - drdt2 = merge(-1._wp, 1._wp, j == 1 .or. j == 2)/(2._wp*sqrt(merge(moms(4) - moms(2)**2._wp, verysmall, moms(4) - moms(2)**2._wp > 0._wp))) - drdt2 = drdt2*(msum(3) - 2._wp*moms(2)*msum(2)) - drdt = drdt + drdt2 - rhs_pb(id1, id2, id3, j, q) = (-3._wp*gam*drdt/abscX(j, q))*(pb(id1, id2, id3, j, q)) - rhs_pb(id1, id2, id3, j, q) = rhs_pb(id1, id2, id3, j, q) + (3._wp*gam/abscX(j, q))*rhs_mv(id1, id2, id3, j, q)*R_v*Tw - rhs_pb(id1, id2, id3, j, q) = rhs_pb(id1, id2, id3, j, q) + (3._wp*gam/abscX(j, q))*ht(j, q) - rhs_mv(id1, id2, id3, j, q) = rhs_mv(id1, id2, id3, j, q)*(4._wp*pi*abscX(j, q)**2._wp) - end do - end if + ! Compute change in moments due to bubble dynamics + r = 1 + $:GPU_LOOP(parallelism='[seq]') + do i2 = 0, 2 + $:GPU_LOOP(parallelism='[seq]') + do i1 = 0, 2 + if ((i1 + i2) <= 2) then + momsum = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do j = 1, nterms + select case (bubble_model) + case (3) + if (j == 3) then + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_pb(:, q), momrhs(:, i1, i2, j, q)) + else + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght(:, q), momrhs(:, i1, i2, j, q)) + end if + case (2) + if ((j >= 7 .and. j <= 9) .or. (j >= 22 .and. j <= 23) .or. (j >= 10 .and. j <= 11) .or. (j == 26)) then + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_pb(:, q), momrhs(:, i1, i2, j, q)) + else if ((j >= 27 .and. j <= 29) .and. (.not. polytropic)) then + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_mv(:, q), momrhs(:, i1, i2, j, q)) + else if ((j >= 30 .and. j <= 32) .and. (.not. polytropic)) then + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_ht(:, q), momrhs(:, i1, i2, j, q)) + else + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght(:, q), momrhs(:, i1, i2, j, q)) + end if + end select + end do + moms3d(i1, i2, q)%sf(id1, id2, id3) = nbub*momsum + msum(r) = momsum + r = r + 1 + end if + end do end do - ! Compute special high-order moments - momsp(1)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght, 3._wp, 0._wp, 0._wp) - momsp(2)%sf(id1, id2, id3) = 4._wp*pi*nbub*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) - momsp(3)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght, 3._wp, 2._wp, 0._wp) - if (abs(gam - 1._wp) <= 1.e-4_wp) then - momsp(4)%sf(id1, id2, id3) = 1._wp - else - if (polytropic) then - momsp(4)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght_pb, 3._wp*(1._wp - gam), 0._wp, 3._wp*gam) + pv*f_quad(abscX, abscY, wght, 3._wp, 0._wp, 0._wp) - 4._wp*Re_inv*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) - (2._wp/Web)*f_quad(abscX, abscY, wght, 2._wp, 0._wp, 0._wp) - else - momsp(4)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght_pb, 3._wp, 0._wp, 0._wp) - 4._wp*Re_inv*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) - (2._wp/Web)*f_quad(abscX, abscY, wght, 2._wp, 0._wp, 0._wp) - end if + ! Compute change in pb and mv for non-polytropic model + if (.not. polytropic) then + $:GPU_LOOP(parallelism='[seq]') + do j = 1, nnode + drdt = msum(2) + drdt2 = merge(-1._wp, 1._wp, j == 1 .or. j == 2)/(2._wp*sqrt(merge(moms(4) - moms(2)**2._wp, verysmall, moms(4) - moms(2)**2._wp > 0._wp))) + drdt2 = drdt2*(msum(3) - 2._wp*moms(2)*msum(2)) + drdt = drdt + drdt2 + rhs_pb(id1, id2, id3, j, q) = (-3._wp*gam*drdt/abscX(j, q))*(pb(id1, id2, id3, j, q)) + rhs_pb(id1, id2, id3, j, q) = rhs_pb(id1, id2, id3, j, q) + (3._wp*gam/abscX(j, q))*rhs_mv(id1, id2, id3, j, q)*R_v*Tw + rhs_pb(id1, id2, id3, j, q) = rhs_pb(id1, id2, id3, j, q) + (3._wp*gam/abscX(j, q))*ht(j, q) + rhs_mv(id1, id2, id3, j, q) = rhs_mv(id1, id2, id3, j, q)*(4._wp*pi*abscX(j, q)**2._wp) + end do end if + end do + + ! Compute special high-order moments + momsp(1)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght, 3._wp, 0._wp, 0._wp) + momsp(2)%sf(id1, id2, id3) = 4._wp*pi*nbub*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) + momsp(3)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght, 3._wp, 2._wp, 0._wp) + if (abs(gam - 1._wp) <= 1.e-4_wp) then + momsp(4)%sf(id1, id2, id3) = 1._wp else + if (polytropic) then + momsp(4)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght_pb, 3._wp*(1._wp - gam), 0._wp, 3._wp*gam) + pv*f_quad(abscX, abscY, wght, 3._wp, 0._wp, 0._wp) - 4._wp*Re_inv*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) - (2._wp/Web)*f_quad(abscX, abscY, wght, 2._wp, 0._wp, 0._wp) + else + momsp(4)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght_pb, 3._wp, 0._wp, 0._wp) - 4._wp*Re_inv*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) - (2._wp/Web)*f_quad(abscX, abscY, wght, 2._wp, 0._wp, 0._wp) + end if + end if + else + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb + do i1 = 0, 2 $:GPU_LOOP(parallelism='[seq]') - do i1 = 0, 2 - $:GPU_LOOP(parallelism='[seq]') - do i2 = 0, 2 - moms3d(i1, i2, q)%sf(id1, id2, id3) = 0._wp - end do + do i2 = 0, 2 + moms3d(i1, i2, q)%sf(id1, id2, id3) = 0._wp end do end do - momsp(1)%sf(id1, id2, id3) = 0._wp - momsp(2)%sf(id1, id2, id3) = 0._wp - momsp(3)%sf(id1, id2, id3) = 0._wp - momsp(4)%sf(id1, id2, id3) = 0._wp - end if - end do + end do + momsp(1)%sf(id1, id2, id3) = 0._wp + momsp(2)%sf(id1, id2, id3) = 0._wp + momsp(3)%sf(id1, id2, id3) = 0._wp + momsp(4)%sf(id1, id2, id3) = 0._wp + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() contains ! Helper to select the correct coefficient routine diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index a9b0e06659..06797bb168 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -564,19 +564,19 @@ contains end do end if ! end allocation of viscous variables - #:call GPU_PARALLEL_LOOP(collapse=4) - do id = 1, num_dims - do i = 1, sys_size - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - flux_gsrc_n(id)%vf(i)%sf(j, k, l) = 0._wp - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l,id]', collapse=4) + do id = 1, num_dims + do i = 1, sys_size + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + flux_gsrc_n(id)%vf(i)%sf(j, k, l) = 0._wp end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if ! end allocation for .not. igr @@ -647,39 +647,39 @@ contains if (.not. igr) then ! Association/Population of Working Variables - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - q_cons_qp%vf(i)%sf(j, k, l) = q_cons_vf(i)%sf(j, k, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, sys_size + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + q_cons_qp%vf(i)%sf(j, k, l) = q_cons_vf(i)%sf(j, k, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() ! Converting Conservative to Primitive Variables if (mpp_lim .and. bubbles_euler) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - alf_sum%sf(j, k, l) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - 1 - alf_sum%sf(j, k, l) = alf_sum%sf(j, k, l) + q_cons_qp%vf(i)%sf(j, k, l) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - 1 - q_cons_qp%vf(i)%sf(j, k, l) = q_cons_qp%vf(i)%sf(j, k, l)*(1._wp - q_cons_qp%vf(alf_idx)%sf(j, k, l)) & - /alf_sum%sf(j, k, l) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + alf_sum%sf(j, k, l) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe - 1 + alf_sum%sf(j, k, l) = alf_sum%sf(j, k, l) + q_cons_qp%vf(i)%sf(j, k, l) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe - 1 + q_cons_qp%vf(i)%sf(j, k, l) = q_cons_qp%vf(i)%sf(j, k, l)*(1._wp - q_cons_qp%vf(alf_idx)%sf(j, k, l)) & + /alf_sum%sf(j, k, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end if @@ -739,17 +739,17 @@ contains if (igr) then if (id == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do l = -1, p + 1 - do k = -1, n + 1 - do j = -1, m + 1 - do i = 1, sys_size - rhs_vf(i)%sf(j, k, l) = 0._wp - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do l = -1, p + 1 + do k = -1, n + 1 + do j = -1, m + 1 + do i = 1, sys_size + rhs_vf(i)%sf(j, k, l) = 0._wp end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if call nvtxStartRange("IGR_RIEMANN") @@ -970,19 +970,19 @@ contains ! END: Dimensional Splitting Loop if (ib) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - if (ib_markers%sf(j, k, l) /= 0) then - do i = 1, sys_size - rhs_vf(i)%sf(j, k, l) = 0._wp - end do - end if - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + if (ib_markers%sf(j, k, l) /= 0) then + do i = 1, sys_size + rhs_vf(i)%sf(j, k, l) = 0._wp + end do + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if ! Additional Physics and Source Temrs @@ -1037,17 +1037,17 @@ contains if (run_time_info .or. probe_wrt .or. ib .or. bubbles_lagrange) then if (.not. igr) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - q_prim_vf(i)%sf(j, k, l) = q_prim_qp%vf(i)%sf(j, k, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, sys_size + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + q_prim_vf(i)%sf(j, k, l) = q_prim_qp%vf(i)%sf(j, k, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end if @@ -1079,30 +1079,30 @@ contains real(wp) :: advected_qty_val, pressure_val, velocity_val if (alt_soundspeed) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do q_loop = 0, p - do l_loop = 0, n - do k_loop = 0, m - blkmod1(k_loop, l_loop, q_loop) = ((gammas(1) + 1._wp)*q_prim_vf%vf(E_idx)%sf(k_loop, l_loop, q_loop) + & - pi_infs(1))/gammas(1) - blkmod2(k_loop, l_loop, q_loop) = ((gammas(2) + 1._wp)*q_prim_vf%vf(E_idx)%sf(k_loop, l_loop, q_loop) + & - pi_infs(2))/gammas(2) - alpha1(k_loop, l_loop, q_loop) = q_cons_vf%vf(advxb)%sf(k_loop, l_loop, q_loop) - - if (bubbles_euler) then - alpha2(k_loop, l_loop, q_loop) = q_cons_vf%vf(alf_idx - 1)%sf(k_loop, l_loop, q_loop) - else - alpha2(k_loop, l_loop, q_loop) = q_cons_vf%vf(advxe)%sf(k_loop, l_loop, q_loop) - end if + $:GPU_PARALLEL_LOOP(private='[k_loop,l_loop,q_loop]', collapse=3) + do q_loop = 0, p + do l_loop = 0, n + do k_loop = 0, m + blkmod1(k_loop, l_loop, q_loop) = ((gammas(1) + 1._wp)*q_prim_vf%vf(E_idx)%sf(k_loop, l_loop, q_loop) + & + pi_infs(1))/gammas(1) + blkmod2(k_loop, l_loop, q_loop) = ((gammas(2) + 1._wp)*q_prim_vf%vf(E_idx)%sf(k_loop, l_loop, q_loop) + & + pi_infs(2))/gammas(2) + alpha1(k_loop, l_loop, q_loop) = q_cons_vf%vf(advxb)%sf(k_loop, l_loop, q_loop) + + if (bubbles_euler) then + alpha2(k_loop, l_loop, q_loop) = q_cons_vf%vf(alf_idx - 1)%sf(k_loop, l_loop, q_loop) + else + alpha2(k_loop, l_loop, q_loop) = q_cons_vf%vf(advxe)%sf(k_loop, l_loop, q_loop) + end if - Kterm(k_loop, l_loop, q_loop) = alpha1(k_loop, l_loop, q_loop)*alpha2(k_loop, l_loop, q_loop)* & - (blkmod2(k_loop, l_loop, q_loop) - blkmod1(k_loop, l_loop, q_loop))/ & - (alpha1(k_loop, l_loop, q_loop)*blkmod2(k_loop, l_loop, q_loop) + & - alpha2(k_loop, l_loop, q_loop)*blkmod1(k_loop, l_loop, q_loop)) - end do + Kterm(k_loop, l_loop, q_loop) = alpha1(k_loop, l_loop, q_loop)*alpha2(k_loop, l_loop, q_loop)* & + (blkmod2(k_loop, l_loop, q_loop) - blkmod1(k_loop, l_loop, q_loop))/ & + (alpha1(k_loop, l_loop, q_loop)*blkmod2(k_loop, l_loop, q_loop) + & + alpha2(k_loop, l_loop, q_loop)*blkmod1(k_loop, l_loop, q_loop)) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if select case (idir) @@ -1114,40 +1114,40 @@ contains call s_cbc(q_prim_vf%vf, flux_n(idir)%vf, flux_src_n_vf%vf, idir, 1, irx, iry, irz) end if - #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]') - do j = 1, sys_size - do q_loop = 0, p - do l_loop = 0, n - do k_loop = 0, m - inv_ds = 1._wp/dx(k_loop) - flux_face1 = flux_n(1)%vf(j)%sf(k_loop - 1, l_loop, q_loop) - flux_face2 = flux_n(1)%vf(j)%sf(k_loop, l_loop, q_loop) - rhs_vf(j)%sf(k_loop, l_loop, q_loop) = inv_ds*(flux_face1 - flux_face2) - end do + $:GPU_PARALLEL_LOOP(collapse=4,private='[j,k_loop,l_loop,q_loop,inv_ds,flux_face1,flux_face2]') + do j = 1, sys_size + do q_loop = 0, p + do l_loop = 0, n + do k_loop = 0, m + inv_ds = 1._wp/dx(k_loop) + flux_face1 = flux_n(1)%vf(j)%sf(k_loop - 1, l_loop, q_loop) + flux_face2 = flux_n(1)%vf(j)%sf(k_loop, l_loop, q_loop) + rhs_vf(j)%sf(k_loop, l_loop, q_loop) = inv_ds*(flux_face1 - flux_face2) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (model_eqns == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') - do q_loop = 0, p - do l_loop = 0, n - do k_loop = 0, m - do i_fluid_loop = 1, num_fluids - inv_ds = 1._wp/dx(k_loop) - advected_qty_val = q_cons_vf%vf(i_fluid_loop + advxb - 1)%sf(k_loop, l_loop, q_loop) - pressure_val = q_prim_vf%vf(E_idx)%sf(k_loop, l_loop, q_loop) - flux_face1 = flux_src_n_vf%vf(advxb)%sf(k_loop, l_loop, q_loop) - flux_face2 = flux_src_n_vf%vf(advxb)%sf(k_loop - 1, l_loop, q_loop) - rhs_vf(i_fluid_loop + intxb - 1)%sf(k_loop, l_loop, q_loop) = & - rhs_vf(i_fluid_loop + intxb - 1)%sf(k_loop, l_loop, q_loop) - & - inv_ds*advected_qty_val*pressure_val*(flux_face1 - flux_face2) - end do + $:GPU_PARALLEL_LOOP(collapse=4,private='[i_fluid_loop,k_loop,l_loop,q_loop,inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') + do q_loop = 0, p + do l_loop = 0, n + do k_loop = 0, m + do i_fluid_loop = 1, num_fluids + inv_ds = 1._wp/dx(k_loop) + advected_qty_val = q_cons_vf%vf(i_fluid_loop + advxb - 1)%sf(k_loop, l_loop, q_loop) + pressure_val = q_prim_vf%vf(E_idx)%sf(k_loop, l_loop, q_loop) + flux_face1 = flux_src_n_vf%vf(advxb)%sf(k_loop, l_loop, q_loop) + flux_face2 = flux_src_n_vf%vf(advxb)%sf(k_loop - 1, l_loop, q_loop) + rhs_vf(i_fluid_loop + intxb - 1)%sf(k_loop, l_loop, q_loop) = & + rhs_vf(i_fluid_loop + intxb - 1)%sf(k_loop, l_loop, q_loop) - & + inv_ds*advected_qty_val*pressure_val*(flux_face1 - flux_face2) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if call s_add_directional_advection_source_terms(idir, rhs_vf, q_cons_vf, q_prim_vf, flux_src_n_vf, Kterm) @@ -1160,62 +1160,62 @@ contains call s_cbc(q_prim_vf%vf, flux_n(idir)%vf, flux_src_n_vf%vf, idir, 1, irx, iry, irz) end if - #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]') - do j = 1, sys_size - do l = 0, p - do k = 0, n - do q = 0, m - inv_ds = 1._wp/dy(k) - flux_face1 = flux_n(2)%vf(j)%sf(q, k - 1, l) - flux_face2 = flux_n(2)%vf(j)%sf(q, k, l) - rhs_vf(j)%sf(q, k, l) = rhs_vf(j)%sf(q, k, l) + inv_ds*(flux_face1 - flux_face2) - end do + $:GPU_PARALLEL_LOOP(collapse=4,private='[j,k,l,q,inv_ds,flux_face1,flux_face2]') + do j = 1, sys_size + do l = 0, p + do k = 0, n + do q = 0, m + inv_ds = 1._wp/dy(k) + flux_face1 = flux_n(2)%vf(j)%sf(q, k - 1, l) + flux_face2 = flux_n(2)%vf(j)%sf(q, k, l) + rhs_vf(j)%sf(q, k, l) = rhs_vf(j)%sf(q, k, l) + inv_ds*(flux_face1 - flux_face2) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (model_eqns == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') - do l = 0, p - do k = 0, n - do q = 0, m - do i_fluid_loop = 1, num_fluids - inv_ds = 1._wp/dy(k) - advected_qty_val = q_cons_vf%vf(i_fluid_loop + advxb - 1)%sf(q, k, l) - pressure_val = q_prim_vf%vf(E_idx)%sf(q, k, l) - flux_face1 = flux_src_n_vf%vf(advxb)%sf(q, k, l) - flux_face2 = flux_src_n_vf%vf(advxb)%sf(q, k - 1, l) + $:GPU_PARALLEL_LOOP(collapse=4,private='[i_fluid_loop,k,l,q,inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') + do l = 0, p + do k = 0, n + do q = 0, m + do i_fluid_loop = 1, num_fluids + inv_ds = 1._wp/dy(k) + advected_qty_val = q_cons_vf%vf(i_fluid_loop + advxb - 1)%sf(q, k, l) + pressure_val = q_prim_vf%vf(E_idx)%sf(q, k, l) + flux_face1 = flux_src_n_vf%vf(advxb)%sf(q, k, l) + flux_face2 = flux_src_n_vf%vf(advxb)%sf(q, k - 1, l) + rhs_vf(i_fluid_loop + intxb - 1)%sf(q, k, l) = & + rhs_vf(i_fluid_loop + intxb - 1)%sf(q, k, l) - & + inv_ds*advected_qty_val*pressure_val*(flux_face1 - flux_face2) + if (cyl_coord) then rhs_vf(i_fluid_loop + intxb - 1)%sf(q, k, l) = & rhs_vf(i_fluid_loop + intxb - 1)%sf(q, k, l) - & - inv_ds*advected_qty_val*pressure_val*(flux_face1 - flux_face2) - if (cyl_coord) then - rhs_vf(i_fluid_loop + intxb - 1)%sf(q, k, l) = & - rhs_vf(i_fluid_loop + intxb - 1)%sf(q, k, l) - & - 5.e-1_wp/y_cc(k)*advected_qty_val*pressure_val*(flux_face1 + flux_face2) - end if - end do + 5.e-1_wp/y_cc(k)*advected_qty_val*pressure_val*(flux_face1 + flux_face2) + end if end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (cyl_coord) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[flux_face1,flux_face2]') - do j = 1, sys_size - do l = 0, p - do k = 0, n - do q = 0, m - flux_face1 = flux_gsrc_n(2)%vf(j)%sf(q, k - 1, l) - flux_face2 = flux_gsrc_n(2)%vf(j)%sf(q, k, l) - rhs_vf(j)%sf(q, k, l) = rhs_vf(j)%sf(q, k, l) - & - 5.e-1_wp/y_cc(k)*(flux_face1 + flux_face2) - end do + $:GPU_PARALLEL_LOOP(collapse=4,private='[j,k,l,q,flux_face1,flux_face2]') + do j = 1, sys_size + do l = 0, p + do k = 0, n + do q = 0, m + flux_face1 = flux_gsrc_n(2)%vf(j)%sf(q, k - 1, l) + flux_face2 = flux_gsrc_n(2)%vf(j)%sf(q, k, l) + rhs_vf(j)%sf(q, k, l) = rhs_vf(j)%sf(q, k, l) - & + 5.e-1_wp/y_cc(k)*(flux_face1 + flux_face2) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if call s_add_directional_advection_source_terms(idir, rhs_vf, q_cons_vf, q_prim_vf, flux_src_n_vf, Kterm) @@ -1229,72 +1229,72 @@ contains end if if (grid_geometry == 3) then ! Cylindrical Coordinates - #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,velocity_val,flux_face1,flux_face2]') - do j = 1, sys_size - do k = 0, p - do q = 0, n - do l = 0, m - inv_ds = 1._wp/(dz(k)*y_cc(q)) - velocity_val = q_prim_vf%vf(contxe + idir)%sf(l, q, k) - flux_face1 = flux_n(3)%vf(j)%sf(l, q, k - 1) - flux_face2 = flux_n(3)%vf(j)%sf(l, q, k) - rhs_vf(j)%sf(l, q, k) = rhs_vf(j)%sf(l, q, k) + & - inv_ds*velocity_val*(flux_face1 - flux_face2) - end do + $:GPU_PARALLEL_LOOP(collapse=4,private='[j,k,l,q,inv_ds,velocity_val,flux_face1,flux_face2]') + do j = 1, sys_size + do k = 0, p + do q = 0, n + do l = 0, m + inv_ds = 1._wp/(dz(k)*y_cc(q)) + velocity_val = q_prim_vf%vf(contxe + idir)%sf(l, q, k) + flux_face1 = flux_n(3)%vf(j)%sf(l, q, k - 1) + flux_face2 = flux_n(3)%vf(j)%sf(l, q, k) + rhs_vf(j)%sf(l, q, k) = rhs_vf(j)%sf(l, q, k) + & + inv_ds*velocity_val*(flux_face1 - flux_face2) end do end do end do - #:endcall GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=4,private='[flux_face1,flux_face2]') - do j = 1, sys_size - do k = 0, p - do q = 0, n - do l = 0, m - flux_face1 = flux_gsrc_n(3)%vf(j)%sf(l, q, k - 1) - flux_face2 = flux_gsrc_n(3)%vf(j)%sf(l, q, k) - rhs_vf(j)%sf(l, q, k) = rhs_vf(j)%sf(l, q, k) - & - 5.e-1_wp/y_cc(q)*(flux_face1 + flux_face2) - end do + end do + $:END_GPU_PARALLEL_LOOP() + $:GPU_PARALLEL_LOOP(collapse=4,private='[j,k,l,q,flux_face1,flux_face2]') + do j = 1, sys_size + do k = 0, p + do q = 0, n + do l = 0, m + flux_face1 = flux_gsrc_n(3)%vf(j)%sf(l, q, k - 1) + flux_face2 = flux_gsrc_n(3)%vf(j)%sf(l, q, k) + rhs_vf(j)%sf(l, q, k) = rhs_vf(j)%sf(l, q, k) - & + 5.e-1_wp/y_cc(q)*(flux_face1 + flux_face2) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() else ! Cartesian Coordinates - #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]') - do j = 1, sys_size - do k = 0, p - do q = 0, n - do l = 0, m - inv_ds = 1._wp/dz(k) - flux_face1 = flux_n(3)%vf(j)%sf(l, q, k - 1) - flux_face2 = flux_n(3)%vf(j)%sf(l, q, k) - rhs_vf(j)%sf(l, q, k) = rhs_vf(j)%sf(l, q, k) + inv_ds*(flux_face1 - flux_face2) - end do + $:GPU_PARALLEL_LOOP(collapse=4,private='[j,k,l,q,inv_ds,flux_face1,flux_face2]') + do j = 1, sys_size + do k = 0, p + do q = 0, n + do l = 0, m + inv_ds = 1._wp/dz(k) + flux_face1 = flux_n(3)%vf(j)%sf(l, q, k - 1) + flux_face2 = flux_n(3)%vf(j)%sf(l, q, k) + rhs_vf(j)%sf(l, q, k) = rhs_vf(j)%sf(l, q, k) + inv_ds*(flux_face1 - flux_face2) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (model_eqns == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') - do k = 0, p - do q = 0, n - do l = 0, m - do i_fluid_loop = 1, num_fluids - inv_ds = 1._wp/dz(k) - advected_qty_val = q_cons_vf%vf(i_fluid_loop + advxb - 1)%sf(l, q, k) - pressure_val = q_prim_vf%vf(E_idx)%sf(l, q, k) - flux_face1 = flux_src_n_vf%vf(advxb)%sf(l, q, k) - flux_face2 = flux_src_n_vf%vf(advxb)%sf(l, q, k - 1) - rhs_vf(i_fluid_loop + intxb - 1)%sf(l, q, k) = & - rhs_vf(i_fluid_loop + intxb - 1)%sf(l, q, k) - & - inv_ds*advected_qty_val*pressure_val*(flux_face1 - flux_face2) - end do + $:GPU_PARALLEL_LOOP(collapse=4,private='[i_fluid_loop,k,l,q,inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') + do k = 0, p + do q = 0, n + do l = 0, m + do i_fluid_loop = 1, num_fluids + inv_ds = 1._wp/dz(k) + advected_qty_val = q_cons_vf%vf(i_fluid_loop + advxb - 1)%sf(l, q, k) + pressure_val = q_prim_vf%vf(E_idx)%sf(l, q, k) + flux_face1 = flux_src_n_vf%vf(advxb)%sf(l, q, k) + flux_face2 = flux_src_n_vf%vf(advxb)%sf(l, q, k - 1) + rhs_vf(i_fluid_loop + intxb - 1)%sf(l, q, k) = & + rhs_vf(i_fluid_loop + intxb - 1)%sf(l, q, k) - & + inv_ds*advected_qty_val*pressure_val*(flux_face1 - flux_face2) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if call s_add_directional_advection_source_terms(idir, rhs_vf, q_cons_vf, q_prim_vf, flux_src_n_vf, Kterm) @@ -1322,136 +1322,136 @@ contains case (1) ! x-direction use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) if (use_standard_riemann) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do q_idx = 0, p ! z_extent - do l_idx = 0, n ! y_extent - do k_idx = 0, m ! x_extent - local_inv_ds = 1._wp/dx(k_idx) - local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(k_idx, l_idx, q_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx - 1, l_idx, q_idx) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx, l_idx, q_idx) - rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do + $:GPU_PARALLEL_LOOP(collapse=4,private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') + do j_adv = advxb, advxe + do q_idx = 0, p ! z_extent + do l_idx = 0, n ! y_extent + do k_idx = 0, m ! x_extent + local_inv_ds = 1._wp/dx(k_idx) + local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(k_idx, l_idx, q_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx - 1, l_idx, q_idx) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx, l_idx, q_idx) + rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') - do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m - local_inv_ds = 1._wp/dx(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(k_idx, l_idx, q_idx) - local_k_term_val = Kterm_arg(k_idx, l_idx, q_idx) ! Access is safe due to outer alt_soundspeed check - local_term_coeff = local_q_cons_val - local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(k_idx, l_idx, q_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(k_idx - 1, l_idx, q_idx) - rhs_vf_arg(advxe)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(advxe)%sf(k_idx, l_idx, q_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do - #:endcall GPU_PARALLEL_LOOP - - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds,local_q_cons_val, local_k_term_val,local_term_coeff, local_flux1, local_flux2]') - do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m - local_inv_ds = 1._wp/dx(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(k_idx, l_idx, q_idx) - local_k_term_val = Kterm_arg(k_idx, l_idx, q_idx) ! Access is safe - local_term_coeff = local_q_cons_val + local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(k_idx, l_idx, q_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(k_idx - 1, l_idx, q_idx) - rhs_vf_arg(advxb)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(advxb)%sf(k_idx, l_idx, q_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do - #:endcall GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') + do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m + local_inv_ds = 1._wp/dx(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(k_idx, l_idx, q_idx) + local_k_term_val = Kterm_arg(k_idx, l_idx, q_idx) ! Access is safe due to outer alt_soundspeed check + local_term_coeff = local_q_cons_val - local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(k_idx, l_idx, q_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(k_idx - 1, l_idx, q_idx) + rhs_vf_arg(advxe)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(advxe)%sf(k_idx, l_idx, q_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds,local_q_cons_val, local_k_term_val,local_term_coeff, local_flux1, local_flux2]') + do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m + local_inv_ds = 1._wp/dx(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(k_idx, l_idx, q_idx) + local_k_term_val = Kterm_arg(k_idx, l_idx, q_idx) ! Access is safe + local_term_coeff = local_q_cons_val + local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(k_idx, l_idx, q_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(k_idx - 1, l_idx, q_idx) + rhs_vf_arg(advxb)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(advxb)%sf(k_idx, l_idx, q_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do + $:END_GPU_PARALLEL_LOOP() end if else ! NOT alt_soundspeed - #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m - local_inv_ds = 1._wp/dx(k_idx) - local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(k_idx, l_idx, q_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx, l_idx, q_idx) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx - 1, l_idx, q_idx) - rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do - end do - #:endcall GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(collapse=4,private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') + do j_adv = advxb, advxe + do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m + local_inv_ds = 1._wp/dx(k_idx) + local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(k_idx, l_idx, q_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx, l_idx, q_idx) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx - 1, l_idx, q_idx) + rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do + end do + $:END_GPU_PARALLEL_LOOP() end if end if case (2) ! y-direction: loops q_idx (x), k_idx (y), l_idx (z); sf(q_idx, k_idx, l_idx); dy(k_idx); Kterm(q_idx,k_idx,l_idx) use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) if (use_standard_riemann) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do l_idx = 0, p ! z_extent - do k_idx = 0, n ! y_extent - do q_idx = 0, m ! x_extent - local_inv_ds = 1._wp/dy(k_idx) - local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(q_idx, k_idx, l_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx - 1, l_idx) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) - rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do + $:GPU_PARALLEL_LOOP(collapse=4,private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') + do j_adv = advxb, advxe + do l_idx = 0, p ! z_extent + do k_idx = 0, n ! y_extent + do q_idx = 0, m ! x_extent + local_inv_ds = 1._wp/dy(k_idx) + local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(q_idx, k_idx, l_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx - 1, l_idx) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) + rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') - do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m - local_inv_ds = 1._wp/dy(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx) - local_k_term_val = Kterm_arg(q_idx, k_idx, l_idx) ! Access is safe - local_term_coeff = local_q_cons_val - local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(q_idx, k_idx - 1, l_idx) - rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - if (cyl_coord) then - rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) - & - (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) - end if - end do; end do; end do - #:endcall GPU_PARALLEL_LOOP - - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val,local_term_coeff, local_flux1, local_flux2]') - do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m - local_inv_ds = 1._wp/dy(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx) - local_k_term_val = Kterm_arg(q_idx, k_idx, l_idx) ! Access is safe - local_term_coeff = local_q_cons_val + local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(q_idx, k_idx - 1, l_idx) + $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') + do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m + local_inv_ds = 1._wp/dy(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx) + local_k_term_val = Kterm_arg(q_idx, k_idx, l_idx) ! Access is safe + local_term_coeff = local_q_cons_val - local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(q_idx, k_idx - 1, l_idx) + rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + if (cyl_coord) then + rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) - & + (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) + end if + end do; end do; end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds, local_q_cons_val, local_k_term_val,local_term_coeff, local_flux1, local_flux2]') + do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m + local_inv_ds = 1._wp/dy(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx) + local_k_term_val = Kterm_arg(q_idx, k_idx, l_idx) ! Access is safe + local_term_coeff = local_q_cons_val + local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(q_idx, k_idx - 1, l_idx) + rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + if (cyl_coord) then rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - if (cyl_coord) then - rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) + & - (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) - end if - end do; end do; end do - #:endcall GPU_PARALLEL_LOOP + (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) + end if + end do; end do; end do + $:END_GPU_PARALLEL_LOOP() end if else ! NOT alt_soundspeed - #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m - local_inv_ds = 1._wp/dy(k_idx) - local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx - 1, l_idx) - rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do - end do - #:endcall GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(collapse=4,private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') + do j_adv = advxb, advxe + do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m + local_inv_ds = 1._wp/dy(k_idx) + local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx - 1, l_idx) + rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do + end do + $:END_GPU_PARALLEL_LOOP() end if end if @@ -1463,64 +1463,64 @@ contains end if if (use_standard_riemann) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do k_idx = 0, p ! z_extent - do q_idx = 0, n ! y_extent - do l_idx = 0, m ! x_extent - local_inv_ds = 1._wp/dz(k_idx) - local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(l_idx, q_idx, k_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx - 1) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) - rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do + $:GPU_PARALLEL_LOOP(collapse=4,private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') + do j_adv = advxb, advxe + do k_idx = 0, p ! z_extent + do q_idx = 0, n ! y_extent + do l_idx = 0, m ! x_extent + local_inv_ds = 1._wp/dz(k_idx) + local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(l_idx, q_idx, k_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx - 1) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) + rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds,local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') - do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m - local_inv_ds = 1._wp/dz(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx) - local_k_term_val = Kterm_arg(l_idx, q_idx, k_idx) ! Access is safe - local_term_coeff = local_q_cons_val - local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx - 1) - rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do - #:endcall GPU_PARALLEL_LOOP - - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') - do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m - local_inv_ds = 1._wp/dz(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx) - local_k_term_val = Kterm_arg(l_idx, q_idx, k_idx) ! Access is safe - local_term_coeff = local_q_cons_val + local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx - 1) - rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do - #:endcall GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds,local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') + do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m + local_inv_ds = 1._wp/dz(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx) + local_k_term_val = Kterm_arg(l_idx, q_idx, k_idx) ! Access is safe + local_term_coeff = local_q_cons_val - local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx - 1) + rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') + do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m + local_inv_ds = 1._wp/dz(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx) + local_k_term_val = Kterm_arg(l_idx, q_idx, k_idx) ! Access is safe + local_term_coeff = local_q_cons_val + local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx - 1) + rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do + $:END_GPU_PARALLEL_LOOP() end if else ! NOT alt_soundspeed - #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m - local_inv_ds = 1._wp/dz(k_idx) - local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx - 1) - rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do - end do - #:endcall GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(collapse=4, private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') + do j_adv = advxb, advxe + do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m + local_inv_ds = 1._wp/dz(k_idx) + local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx - 1) + rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do + end do + $:END_GPU_PARALLEL_LOOP() end if end if end select @@ -1542,74 +1542,74 @@ contains if (idir == 1) then ! x-direction if (surface_tension) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(c_idx)%sf(j, k, l) = & - rhs_vf(c_idx)%sf(j, k, l) + 1._wp/dx(j)* & - q_prim_vf(c_idx)%sf(j, k, l)* & - (flux_src_n_in(advxb)%sf(j, k, l) - & - flux_src_n_in(advxb)%sf(j - 1, k, l)) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(c_idx)%sf(j, k, l) = & + rhs_vf(c_idx)%sf(j, k, l) + 1._wp/dx(j)* & + q_prim_vf(c_idx)%sf(j, k, l)* & + (flux_src_n_in(advxb)%sf(j, k, l) - & + flux_src_n_in(advxb)%sf(j - 1, k, l)) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if ((surface_tension .or. viscous) .or. chem_params%diffusion) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - if (surface_tension .or. viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dx(j)* & - (flux_src_n_in(i)%sf(j - 1, k, l) & - - flux_src_n_in(i)%sf(j, k, l)) - end do - end if + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + if (surface_tension .or. viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dx(j)* & + (flux_src_n_in(i)%sf(j - 1, k, l) & + - flux_src_n_in(i)%sf(j, k, l)) + end do + end if - if (chem_params%diffusion) then - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dx(j)* & - (flux_src_n_in(i)%sf(j - 1, k, l) & - - flux_src_n_in(i)%sf(j, k, l)) - end do + if (chem_params%diffusion) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dx(j)* & + (flux_src_n_in(i)%sf(j - 1, k, l) & + - flux_src_n_in(i)%sf(j, k, l)) + end do - if (.not. viscous) then - rhs_vf(E_idx)%sf(j, k, l) = & - rhs_vf(E_idx)%sf(j, k, l) + 1._wp/dx(j)* & - (flux_src_n_in(E_idx)%sf(j - 1, k, l) & - - flux_src_n_in(E_idx)%sf(j, k, l)) - end if + if (.not. viscous) then + rhs_vf(E_idx)%sf(j, k, l) = & + rhs_vf(E_idx)%sf(j, k, l) + 1._wp/dx(j)* & + (flux_src_n_in(E_idx)%sf(j - 1, k, l) & + - flux_src_n_in(E_idx)%sf(j, k, l)) end if - end do + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if elseif (idir == 2) then ! y-direction if (surface_tension) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(c_idx)%sf(j, k, l) = & - rhs_vf(c_idx)%sf(j, k, l) + 1._wp/dy(k)* & - q_prim_vf(c_idx)%sf(j, k, l)* & - (flux_src_n_in(advxb)%sf(j, k, l) - & - flux_src_n_in(advxb)%sf(j, k - 1, l)) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(c_idx)%sf(j, k, l) = & + rhs_vf(c_idx)%sf(j, k, l) + 1._wp/dy(k)* & + q_prim_vf(c_idx)%sf(j, k, l)* & + (flux_src_n_in(advxb)%sf(j, k, l) - & + flux_src_n_in(advxb)%sf(j, k - 1, l)) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (cyl_coord .and. ((bc_y%beg == -2) .or. (bc_y%beg == -14))) then @@ -1630,74 +1630,74 @@ contains idwbuff(1), idwbuff(2), idwbuff(3)) end if - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, 0, l) = & - rhs_vf(i)%sf(j, 0, l) + 1._wp/(y_cc(1) - y_cc(-1))* & - (tau_Re_vf(i)%sf(j, -1, l) & - - tau_Re_vf(i)%sf(j, 1, l)) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=2) + do l = 0, p + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, 0, l) = & + rhs_vf(i)%sf(j, 0, l) + 1._wp/(y_cc(1) - y_cc(-1))* & + (tau_Re_vf(i)%sf(j, -1, l) & + - tau_Re_vf(i)%sf(j, 1, l)) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 1, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & - (flux_src_n_in(i)%sf(j, k - 1, l) & - - flux_src_n_in(i)%sf(j, k, l)) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = 0, p + do k = 1, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & + (flux_src_n_in(i)%sf(j, k - 1, l) & + - flux_src_n_in(i)%sf(j, k, l)) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() else if ((surface_tension .or. viscous) .or. chem_params%diffusion) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - if (surface_tension .or. viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & - (flux_src_n_in(i)%sf(j, k - 1, l) & - - flux_src_n_in(i)%sf(j, k, l)) - end do - end if + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + if (surface_tension .or. viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & + (flux_src_n_in(i)%sf(j, k - 1, l) & + - flux_src_n_in(i)%sf(j, k, l)) + end do + end if - if (chem_params%diffusion) then - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & - (flux_src_n_in(i)%sf(j, k - 1, l) & - - flux_src_n_in(i)%sf(j, k, l)) - end do - if (.not. viscous) then - rhs_vf(E_idx)%sf(j, k, l) = & - rhs_vf(E_idx)%sf(j, k, l) + 1._wp/dy(k)* & - (flux_src_n_in(E_idx)%sf(j, k - 1, l) & - - flux_src_n_in(E_idx)%sf(j, k, l)) - end if + if (chem_params%diffusion) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & + (flux_src_n_in(i)%sf(j, k - 1, l) & + - flux_src_n_in(i)%sf(j, k, l)) + end do + if (.not. viscous) then + rhs_vf(E_idx)%sf(j, k, l) = & + rhs_vf(E_idx)%sf(j, k, l) + 1._wp/dy(k)* & + (flux_src_n_in(E_idx)%sf(j, k - 1, l) & + - flux_src_n_in(E_idx)%sf(j, k, l)) end if - end do + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end if @@ -1706,128 +1706,128 @@ contains if (cyl_coord) then if ((bc_y%beg == -2) .or. (bc_y%beg == -14)) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 1, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) - 5.e-1_wp/y_cc(k)* & - (flux_src_n_in(i)%sf(j, k - 1, l) & - + flux_src_n_in(i)%sf(j, k, l)) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = 0, p + do k = 1, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) - 5.e-1_wp/y_cc(k)* & + (flux_src_n_in(i)%sf(j, k - 1, l) & + + flux_src_n_in(i)%sf(j, k, l)) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (viscous) then - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, 0, l) = & - rhs_vf(i)%sf(j, 0, l) - 1._wp/y_cc(0)* & - tau_Re_vf(i)%sf(j, 0, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=2) + do l = 0, p + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, 0, l) = & + rhs_vf(i)%sf(j, 0, l) - 1._wp/y_cc(0)* & + tau_Re_vf(i)%sf(j, 0, l) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if else - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) - 5.e-1_wp/y_cc(k)* & - (flux_src_n_in(i)%sf(j, k - 1, l) & - + flux_src_n_in(i)%sf(j, k, l)) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) - 5.e-1_wp/y_cc(k)* & + (flux_src_n_in(i)%sf(j, k - 1, l) & + + flux_src_n_in(i)%sf(j, k, l)) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end if elseif (idir == 3) then ! z-direction if (surface_tension) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(c_idx)%sf(j, k, l) = & - rhs_vf(c_idx)%sf(j, k, l) + 1._wp/dz(l)* & - q_prim_vf(c_idx)%sf(j, k, l)* & - (flux_src_n_in(advxb)%sf(j, k, l) - & - flux_src_n_in(advxb)%sf(j, k, l - 1)) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(c_idx)%sf(j, k, l) = & + rhs_vf(c_idx)%sf(j, k, l) + 1._wp/dz(l)* & + q_prim_vf(c_idx)%sf(j, k, l)* & + (flux_src_n_in(advxb)%sf(j, k, l) - & + flux_src_n_in(advxb)%sf(j, k, l - 1)) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if ((surface_tension .or. viscous) .or. chem_params%diffusion) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - if (surface_tension .or. viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & - (flux_src_n_in(i)%sf(j, k, l - 1) & - - flux_src_n_in(i)%sf(j, k, l)) - end do - end if + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + if (surface_tension .or. viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & + (flux_src_n_in(i)%sf(j, k, l - 1) & + - flux_src_n_in(i)%sf(j, k, l)) + end do + end if - if (chem_params%diffusion) then - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & - (flux_src_n_in(i)%sf(j, k, l - 1) & - - flux_src_n_in(i)%sf(j, k, l)) - end do - if (.not. viscous) then - rhs_vf(E_idx)%sf(j, k, l) = & - rhs_vf(E_idx)%sf(j, k, l) + 1._wp/dz(l)* & - (flux_src_n_in(E_idx)%sf(j, k, l - 1) & - - flux_src_n_in(E_idx)%sf(j, k, l)) - end if + if (chem_params%diffusion) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & + (flux_src_n_in(i)%sf(j, k, l - 1) & + - flux_src_n_in(i)%sf(j, k, l)) + end do + if (.not. viscous) then + rhs_vf(E_idx)%sf(j, k, l) = & + rhs_vf(E_idx)%sf(j, k, l) + 1._wp/dz(l)* & + (flux_src_n_in(E_idx)%sf(j, k, l - 1) & + - flux_src_n_in(E_idx)%sf(j, k, l)) end if - end do + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (grid_geometry == 3) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(momxb + 1)%sf(j, k, l) = & - rhs_vf(momxb + 1)%sf(j, k, l) + 5.e-1_wp* & - (flux_src_n_in(momxe)%sf(j, k, l - 1) & - + flux_src_n_in(momxe)%sf(j, k, l)) - - rhs_vf(momxe)%sf(j, k, l) = & - rhs_vf(momxe)%sf(j, k, l) - 5.e-1_wp* & - (flux_src_n_in(momxb + 1)%sf(j, k, l - 1) & - + flux_src_n_in(momxb + 1)%sf(j, k, l)) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(momxb + 1)%sf(j, k, l) = & + rhs_vf(momxb + 1)%sf(j, k, l) + 5.e-1_wp* & + (flux_src_n_in(momxe)%sf(j, k, l - 1) & + + flux_src_n_in(momxe)%sf(j, k, l)) + + rhs_vf(momxe)%sf(j, k, l) = & + rhs_vf(momxe)%sf(j, k, l) - 5.e-1_wp* & + (flux_src_n_in(momxb + 1)%sf(j, k, l - 1) & + + flux_src_n_in(momxb + 1)%sf(j, k, l)) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end if @@ -1939,44 +1939,44 @@ contains $:GPU_UPDATE(device='[is1,is2,is3,iv]') if (recon_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() else if (recon_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() else if (recon_dir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end if #:endfor diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index bbb190fb1c..6796966d6b 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -358,623 +358,623 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R,G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do - - vel_L_rms = 0._wp; vel_R_rms = 0._wp + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R,G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp - end do + vel_L_rms = 0._wp; vel_R_rms = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - if (mhd) then - if (n == 0) then ! 1D: constant Bx; By, Bz as variables - B%L(1) = Bx0 - B%R(1) = Bx0 - B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) - B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) - B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) - B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) - else ! 2D/3D: Bx, By, Bz as variables - B%L(1) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) - B%R(1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) - B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) - B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) - B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 2) - B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 2) - end if + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + + if (mhd) then + if (n == 0) then ! 1D: constant Bx; By, Bz as variables + B%L(1) = Bx0 + B%R(1) = Bx0 + B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) + B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) + B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) + B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) + else ! 2D/3D: Bx, By, Bz as variables + B%L(1) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) + B%R(1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) + B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) + B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) + B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 2) + B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 2) end if + end if - rho_L = 0._wp - gamma_L = 0._wp - pi_inf_L = 0._wp - qv_L = 0._wp - - rho_R = 0._wp - gamma_R = 0._wp - pi_inf_R = 0._wp - qv_R = 0._wp + rho_L = 0._wp + gamma_L = 0._wp + pi_inf_L = 0._wp + qv_L = 0._wp - alpha_L_sum = 0._wp - alpha_R_sum = 0._wp + rho_R = 0._wp + gamma_R = 0._wp + pi_inf_R = 0._wp + qv_R = 0._wp - pres_mag%L = 0._wp - pres_mag%R = 0._wp + alpha_L_sum = 0._wp + alpha_R_sum = 0._wp - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = max(0._wp, alpha_rho_L(i)) - alpha_L(i) = min(max(0._wp, alpha_L(i)), 1._wp) - alpha_L_sum = alpha_L_sum + alpha_L(i) - alpha_rho_R(i) = max(0._wp, alpha_rho_R(i)) - alpha_R(i) = min(max(0._wp, alpha_R(i)), 1._wp) - alpha_R_sum = alpha_R_sum + alpha_R(i) - end do - - alpha_L = alpha_L/max(alpha_L_sum, sgm_eps) - alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) - end if + pres_mag%L = 0._wp + pres_mag%R = 0._wp + if (mpp_lim) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - qv_L = qv_L + alpha_rho_L(i)*qvs(i) - - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - qv_R = qv_R + alpha_rho_R(i)*qvs(i) + alpha_rho_L(i) = max(0._wp, alpha_rho_L(i)) + alpha_L(i) = min(max(0._wp, alpha_L(i)), 1._wp) + alpha_L_sum = alpha_L_sum + alpha_L(i) + alpha_rho_R(i) = max(0._wp, alpha_rho_R(i)) + alpha_R(i) = min(max(0._wp, alpha_R(i)), 1._wp) + alpha_R_sum = alpha_R_sum + alpha_R(i) end do - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real + alpha_L = alpha_L/max(alpha_L_sum, sgm_eps) + alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) + end if - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + alpha_rho_L(i) + gamma_L = gamma_L + alpha_L(i)*gammas(i) + pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) + qv_L = qv_L + alpha_rho_L(i)*qvs(i) + + rho_R = rho_R + alpha_rho_R(i) + gamma_R = gamma_R + alpha_R(i)*gammas(i) + pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) + qv_R = qv_R + alpha_rho_R(i)*qvs(i) + end do - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & - + Re_R(i) - end do + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do - end if + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp - if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + do q = 1, Re_size(i) + Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & + + Re_R(i) end do - call get_mixture_molecular_weight(Ys_L, MW_L) - call get_mixture_molecular_weight(Ys_R, MW_R) - - Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) - - R_gas_L = gas_constant/MW_L - R_gas_R = gas_constant/MW_R - T_L = pres_L/rho_L/R_gas_L - T_R = pres_R/rho_R/R_gas_R + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do + end if - call get_species_specific_heats_r(T_L, Cp_iL) - call get_species_specific_heats_r(T_R, Cp_iR) + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - if (chem_params%gamma_method == 1) then - ! gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. - Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) - Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) + call get_mixture_molecular_weight(Ys_L, MW_L) + call get_mixture_molecular_weight(Ys_R, MW_R) + + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) + + R_gas_L = gas_constant/MW_L + R_gas_R = gas_constant/MW_R + T_L = pres_L/rho_L/R_gas_L + T_R = pres_R/rho_R/R_gas_R + + call get_species_specific_heats_r(T_L, Cp_iL) + call get_species_specific_heats_r(T_R, Cp_iR) + + if (chem_params%gamma_method == 1) then + ! gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. + Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) + Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) + + gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) + gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) + else if (chem_params%gamma_method == 2) then + ! gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. + call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) + call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) + call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) + call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) + + Gamm_L = Cp_L/Cv_L + gamma_L = 1.0_wp/(Gamm_L - 1.0_wp) + Gamm_R = Cp_R/Cv_R + gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) + end if - gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) - gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) - else if (chem_params%gamma_method == 2) then - ! gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) - call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) - call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) - call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) + call get_mixture_energy_mass(T_L, Ys_L, E_L) + call get_mixture_energy_mass(T_R, Ys_R, E_R) + + E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms + E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + elseif (mhd .and. relativity) then + Ga%L = 1._wp/sqrt(1._wp - vel_L_rms) + Ga%R = 1._wp/sqrt(1._wp - vel_R_rms) + vdotB%L = vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3) + vdotB%R = vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3) + + b4%L(1:3) = B%L(1:3)/Ga%L + Ga%L*vel_L(1:3)*vdotB%L + b4%R(1:3) = B%R(1:3)/Ga%R + Ga%R*vel_R(1:3)*vdotB%R + B2%L = B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp + B2%R = B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp + + pres_mag%L = 0.5_wp*(B2%L/Ga%L**2._wp + vdotB%L**2._wp) + pres_mag%R = 0.5_wp*(B2%R/Ga%R**2._wp + vdotB%R**2._wp) + + ! Hard-coded EOS + H_L = 1._wp + (gamma_L + 1)*pres_L/rho_L + H_R = 1._wp + (gamma_R + 1)*pres_R/rho_R + + cm%L(1:3) = (rho_L*H_L*Ga%L**2 + B2%L)*vel_L(1:3) - vdotB%L*B%L(1:3) + cm%R(1:3) = (rho_R*H_R*Ga%R**2 + B2%R)*vel_R(1:3) - vdotB%R*B%R(1:3) + + E_L = rho_L*H_L*Ga%L**2 - pres_L + 0.5_wp*(B2%L + vel_L_rms*B2%L - vdotB%L**2._wp) - rho_L*Ga%L + E_R = rho_R*H_R*Ga%R**2 - pres_R + 0.5_wp*(B2%R + vel_R_rms*B2%R - vdotB%R**2._wp) - rho_R*Ga%R + elseif (mhd .and. .not. relativity) then + pres_mag%L = 0.5_wp*(B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp) + pres_mag%R = 0.5_wp*(B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp) + E_L = gamma_L*pres_L + pi_inf_L + 0.5_wp*rho_L*vel_L_rms + qv_L + pres_mag%L + E_R = gamma_R*pres_R + pi_inf_R + 0.5_wp*rho_R*vel_R_rms + qv_R + pres_mag%R ! includes magnetic energy + H_L = (E_L + pres_L - pres_mag%L)/rho_L + H_R = (E_R + pres_R - pres_mag%R)/rho_R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) + else + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + end if - Gamm_L = Cp_L/Cv_L - gamma_L = 1.0_wp/(Gamm_L - 1.0_wp) - Gamm_R = Cp_R/Cv_R - gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) - end if + ! elastic energy update + if (hypoelasticity) then + G_L = 0._wp; G_R = 0._wp - call get_mixture_energy_mass(T_L, Ys_L, E_L) - call get_mixture_energy_mass(T_R, Ys_R, E_R) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) + end do - E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms - E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - elseif (mhd .and. relativity) then - Ga%L = 1._wp/sqrt(1._wp - vel_L_rms) - Ga%R = 1._wp/sqrt(1._wp - vel_R_rms) - vdotB%L = vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3) - vdotB%R = vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3) - - b4%L(1:3) = B%L(1:3)/Ga%L + Ga%L*vel_L(1:3)*vdotB%L - b4%R(1:3) = B%R(1:3)/Ga%R + Ga%R*vel_R(1:3)*vdotB%R - B2%L = B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp - B2%R = B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp - - pres_mag%L = 0.5_wp*(B2%L/Ga%L**2._wp + vdotB%L**2._wp) - pres_mag%R = 0.5_wp*(B2%R/Ga%R**2._wp + vdotB%R**2._wp) - - ! Hard-coded EOS - H_L = 1._wp + (gamma_L + 1)*pres_L/rho_L - H_R = 1._wp + (gamma_R + 1)*pres_R/rho_R - - cm%L(1:3) = (rho_L*H_L*Ga%L**2 + B2%L)*vel_L(1:3) - vdotB%L*B%L(1:3) - cm%R(1:3) = (rho_R*H_R*Ga%R**2 + B2%R)*vel_R(1:3) - vdotB%R*B%R(1:3) - - E_L = rho_L*H_L*Ga%L**2 - pres_L + 0.5_wp*(B2%L + vel_L_rms*B2%L - vdotB%L**2._wp) - rho_L*Ga%L - E_R = rho_R*H_R*Ga%R**2 - pres_R + 0.5_wp*(B2%R + vel_R_rms*B2%R - vdotB%R**2._wp) - rho_R*Ga%R - elseif (mhd .and. .not. relativity) then - pres_mag%L = 0.5_wp*(B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp) - pres_mag%R = 0.5_wp*(B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp) - E_L = gamma_L*pres_L + pi_inf_L + 0.5_wp*rho_L*vel_L_rms + qv_L + pres_mag%L - E_R = gamma_R*pres_R + pi_inf_R + 0.5_wp*rho_R*vel_R_rms + qv_R + pres_mag%R ! includes magnetic energy - H_L = (E_L + pres_L - pres_mag%L)/rho_L - H_R = (E_R + pres_R - pres_mag%R)/rho_R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) - else - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R + if (cont_damage) then + G_L = G_L*max((1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) + G_R = G_R*max((1._wp - qR_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) end if - ! elastic energy update - if (hypoelasticity) then - G_L = 0._wp; G_R = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - - if (cont_damage) then - G_L = G_L*max((1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) - G_R = G_R*max((1._wp - qR_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) - end if - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - ! Elastic contribution to energy if G large enough - !TODO take out if statement if stable without - if ((G_L > 1000) .and. (G_R > 1000)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + ! Elastic contribution to energy if G large enough + !TODO take out if statement if stable without + if ((G_L > 1000) .and. (G_R > 1000)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + ! Double for shear stresses + if (any(strxb - 1 + i == shear_indices)) then E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Double for shear stresses - if (any(strxb - 1 + i == shear_indices)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if end if - end do - end if - - ! elastic energy update - !if ( hyperelasticity ) then - ! G_L = 0._wp - ! G_R = 0._wp - ! - ! $:GPU_LOOP(parallelism='[seq]') - ! do i = 1, num_fluids - ! G_L = G_L + alpha_L(i)*Gs_rs(i) - ! G_R = G_R + alpha_R(i)*Gs_rs(i) - ! end do - ! ! Elastic contribution to energy if G large enough - ! if ((G_L > 1.e-3_wp) .and. (G_R > 1.e-3_wp)) then - ! E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) - ! E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) - ! $:GPU_LOOP(parallelism='[seq]') - ! do i = 1, b_size-1 - ! tau_e_L(i) = G_L*qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - ! tau_e_R(i) = G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - ! end do - ! $:GPU_LOOP(parallelism='[seq]') - ! do i = 1, b_size-1 - ! tau_e_L(i) = 0._wp - ! tau_e_R(i) = 0._wp - ! end do - ! $:GPU_LOOP(parallelism='[seq]') - ! do i = 1, num_dims - ! xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) - ! xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) - ! end do - ! end if - !end if - - @:compute_average_state() - - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) - - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + end if + end do + end if - !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. + ! elastic energy update + !if ( hyperelasticity ) then + ! G_L = 0._wp + ! G_R = 0._wp + ! + ! $:GPU_LOOP(parallelism='[seq]') + ! do i = 1, num_fluids + ! G_L = G_L + alpha_L(i)*Gs_rs(i) + ! G_R = G_R + alpha_R(i)*Gs_rs(i) + ! end do + ! ! Elastic contribution to energy if G large enough + ! if ((G_L > 1.e-3_wp) .and. (G_R > 1.e-3_wp)) then + ! E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) + ! E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) + ! $:GPU_LOOP(parallelism='[seq]') + ! do i = 1, b_size-1 + ! tau_e_L(i) = G_L*qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + ! tau_e_R(i) = G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + ! end do + ! $:GPU_LOOP(parallelism='[seq]') + ! do i = 1, b_size-1 + ! tau_e_L(i) = 0._wp + ! tau_e_R(i) = 0._wp + ! end do + ! $:GPU_LOOP(parallelism='[seq]') + ! do i = 1, num_dims + ! xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) + ! xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) + ! end do + ! end if + !end if + + @:compute_average_state() + + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) + + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) + + !> The computation of c_avg does not require all the variables, and therefore the non '_avg' + ! variables are placeholders to call the subroutine. + + call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & + vel_avg_rms, c_sum_Yi_Phi, c_avg) + + if (mhd) then + call s_compute_fast_magnetosonic_speed(rho_L, c_L, B%L, norm_dir, c_fast%L, H_L) + call s_compute_fast_magnetosonic_speed(rho_R, c_R, B%R, norm_dir, c_fast%R, H_R) + end if - call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, c_sum_Yi_Phi, c_avg) + if (viscous) then + if (chemistry) then + call compute_viscosity_and_inversion(T_L, Ys_L, T_R, Ys_R, Re_L(1), Re_R(1)) + end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) + end do + end if + if (wave_speeds == 1) then if (mhd) then - call s_compute_fast_magnetosonic_speed(rho_L, c_L, B%L, norm_dir, c_fast%L, H_L) - call s_compute_fast_magnetosonic_speed(rho_R, c_R, B%R, norm_dir, c_fast%R, H_R) + s_L = min(vel_L(dir_idx(1)) - c_fast%L, vel_R(dir_idx(1)) - c_fast%R) + s_R = max(vel_R(dir_idx(1)) + c_fast%R, vel_L(dir_idx(1)) + c_fast%L) + elseif (hypoelasticity) then + s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + & + tau_e_L(dir_idx_tau(1)))/rho_L) & + , vel_R(dir_idx(1)) - sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + & + tau_e_R(dir_idx_tau(1)))/rho_R)) + s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + & + tau_e_R(dir_idx_tau(1)))/rho_R) & + , vel_L(dir_idx(1)) + sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + & + tau_e_L(dir_idx_tau(1)))/rho_L)) + else if (hyperelasticity) then + s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + (4._wp*G_L/3._wp)/rho_L) & + , vel_R(dir_idx(1)) - sqrt(c_R*c_R + (4._wp*G_R/3._wp)/rho_R)) + s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + (4._wp*G_R/3._wp)/rho_R) & + , vel_L(dir_idx(1)) + sqrt(c_L*c_L + (4._wp*G_L/3._wp)/rho_L)) + else + s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) + s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) end if - if (viscous) then - if (chemistry) then - call compute_viscosity_and_inversion(T_L, Ys_L, T_R, Ys_R, Re_L(1), Re_R(1)) - end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) - end do - end if + s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & + (s_L - vel_L(dir_idx(1))) - & + rho_R*vel_R(dir_idx(1))* & + (s_R - vel_R(dir_idx(1)))) & + /(rho_L*(s_L - vel_L(dir_idx(1))) - & + rho_R*(s_R - vel_R(dir_idx(1)))) + elseif (wave_speeds == 2) then + pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & + (vel_L(dir_idx(1)) - & + vel_R(dir_idx(1)))) + + pres_SR = pres_SL + + Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & + (pres_SL/pres_L - 1._wp)*pres_L/ & + ((pres_L + pi_inf_L/(1._wp + gamma_L))))) + Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & + (pres_SR/pres_R - 1._wp)*pres_R/ & + ((pres_R + pi_inf_R/(1._wp + gamma_R))))) + + s_L = vel_L(dir_idx(1)) - c_L*Ms_L + s_R = vel_R(dir_idx(1)) + c_R*Ms_R + + s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & + (pres_L - pres_R)/ & + (rho_avg*c_avg)) + end if - if (wave_speeds == 1) then - if (mhd) then - s_L = min(vel_L(dir_idx(1)) - c_fast%L, vel_R(dir_idx(1)) - c_fast%R) - s_R = max(vel_R(dir_idx(1)) + c_fast%R, vel_L(dir_idx(1)) + c_fast%L) - elseif (hypoelasticity) then - s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + & - tau_e_L(dir_idx_tau(1)))/rho_L) & - , vel_R(dir_idx(1)) - sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + & - tau_e_R(dir_idx_tau(1)))/rho_R)) - s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + & - tau_e_R(dir_idx_tau(1)))/rho_R) & - , vel_L(dir_idx(1)) + sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + & - tau_e_L(dir_idx_tau(1)))/rho_L)) - else if (hyperelasticity) then - s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + (4._wp*G_L/3._wp)/rho_L) & - , vel_R(dir_idx(1)) - sqrt(c_R*c_R + (4._wp*G_R/3._wp)/rho_R)) - s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + (4._wp*G_R/3._wp)/rho_R) & - , vel_L(dir_idx(1)) + sqrt(c_L*c_L + (4._wp*G_L/3._wp)/rho_L)) - else - s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) - s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) - end if + s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) - s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & - (s_L - vel_L(dir_idx(1))) - & - rho_R*vel_R(dir_idx(1))* & - (s_R - vel_R(dir_idx(1)))) & - /(rho_L*(s_L - vel_L(dir_idx(1))) - & - rho_R*(s_R - vel_R(dir_idx(1)))) - elseif (wave_speeds == 2) then - pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & - (vel_L(dir_idx(1)) - & - vel_R(dir_idx(1)))) + xi_M = (5.e-1_wp + sign(5.e-1_wp, s_L)) & + + (5.e-1_wp - sign(5.e-1_wp, s_L)) & + *(5.e-1_wp + sign(5.e-1_wp, s_R)) + xi_P = (5.e-1_wp - sign(5.e-1_wp, s_R)) & + + (5.e-1_wp - sign(5.e-1_wp, s_L)) & + *(5.e-1_wp + sign(5.e-1_wp, s_R)) - pres_SR = pres_SL + ! Low Mach correction + if (low_Mach == 1) then + @:compute_low_Mach_correction() + else + pcorr = 0._wp + end if - Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & - (pres_SL/pres_L - 1._wp)*pres_L/ & - ((pres_L + pi_inf_L/(1._wp + gamma_L))))) - Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & - (pres_SR/pres_R - 1._wp)*pres_R/ & - ((pres_R + pi_inf_R/(1._wp + gamma_R))))) + ! Mass + if (.not. relativity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*alpha_rho_R(i)*vel_R(norm_dir) & + - s_P*alpha_rho_L(i)*vel_L(norm_dir) & + + s_M*s_P*(alpha_rho_L(i) & + - alpha_rho_R(i))) & + /(s_M - s_P) + end do + elseif (relativity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) & + - s_P*Ga%L*alpha_rho_L(i)*vel_L(norm_dir) & + + s_M*s_P*(Ga%L*alpha_rho_L(i) & + - Ga%R*alpha_rho_R(i))) & + /(s_M - s_P) + end do + end if - s_L = vel_L(dir_idx(1)) - c_L*Ms_L - s_R = vel_R(dir_idx(1)) + c_R*Ms_R + ! Momentum + if (mhd .and. (.not. relativity)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + ! Flux of rho*v_i in the ${XYZ}$ direction + ! = rho * v_i * v_${XYZ}$ - B_i * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot + flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & + (s_M*(rho_R*vel_R(i)*vel_R(norm_dir) & + - B%R(i)*B%R(norm_dir) & + + dir_flg(i)*(pres_R + pres_mag%R)) & + - s_P*(rho_L*vel_L(i)*vel_L(norm_dir) & + - B%L(i)*B%L(norm_dir) & + + dir_flg(i)*(pres_L + pres_mag%L)) & + + s_M*s_P*(rho_L*vel_L(i) - rho_R*vel_R(i))) & + /(s_M - s_P) + end do + elseif (mhd .and. relativity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + ! Flux of m_i in the ${XYZ}$ direction + ! = m_i * v_${XYZ}$ - b_i/Gamma * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot + flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & + (s_M*(cm%R(i)*vel_R(norm_dir) & + - b4%R(i)/Ga%R*B%R(norm_dir) & + + dir_flg(i)*(pres_R + pres_mag%R)) & + - s_P*(cm%L(i)*vel_L(norm_dir) & + - b4%L(i)/Ga%L*B%L(norm_dir) & + + dir_flg(i)*(pres_L + pres_mag%L)) & + + s_M*s_P*(cm%L(i) - cm%R(i))) & + /(s_M - s_P) + end do + elseif (bubbles_euler) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) + end do + else if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_R & + - tau_e_R(dir_idx_tau(i))) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_L & + - tau_e_L(dir_idx_tau(i))) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & + /(s_M - s_P) + end do + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_R) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_L) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) + end do + end if - s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & - (pres_L - pres_R)/ & - (rho_avg*c_avg)) - end if + ! Energy + if (mhd .and. (.not. relativity)) then + ! energy flux = (E + p + p_mag) * v_${XYZ}$ - B_${XYZ}$ * (v_x*B_x + v_y*B_y + v_z*B_z) + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) - B%R(norm_dir)*(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) & + - s_P*(vel_L(norm_dir)*(E_L + pres_L + pres_mag%L) - B%L(norm_dir)*(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3))) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) + elseif (mhd .and. relativity) then + ! energy flux = m_${XYZ}$ - mass flux + ! Hard-coded for single-component for now + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(cm%R(norm_dir) - Ga%R*alpha_rho_R(1)*vel_R(norm_dir)) & + - s_P*(cm%L(norm_dir) - Ga%L*alpha_rho_L(1)*vel_L(norm_dir)) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) + else if (bubbles_euler) then + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) & + - s_P*vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp + else if (hypoelasticity) then + flux_tau_L = 0._wp; flux_tau_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i)) + flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i)) + end do + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) & + - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) & + + s_M*s_P*(E_L - E_R))/(s_M - s_P) + else + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*vel_R(dir_idx(1))*(E_R + pres_R) & + - s_P*vel_L(dir_idx(1))*(E_L + pres_L) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp + end if - s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + ! Elastic Stresses + if (hypoelasticity) then + do i = 1, strxe - strxb + 1 !TODO: this indexing may be slow + flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *tau_e_R(i)) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *tau_e_L(i)) & + + s_M*s_P*(rho_L*tau_e_L(i) & + - rho_R*tau_e_R(i))) & + /(s_M - s_P) + end do + end if - xi_M = (5.e-1_wp + sign(5.e-1_wp, s_L)) & - + (5.e-1_wp - sign(5.e-1_wp, s_L)) & - *(5.e-1_wp + sign(5.e-1_wp, s_R)) - xi_P = (5.e-1_wp - sign(5.e-1_wp, s_R)) & - + (5.e-1_wp - sign(5.e-1_wp, s_L)) & - *(5.e-1_wp + sign(5.e-1_wp, s_R)) + ! Advection + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (qL_prim_rs${XYZ}$_vf(j, k, l, i) & + - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) & + *s_M*s_P/(s_M - s_P) + flux_src_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + - s_P*qL_prim_rs${XYZ}$_vf(j, k, l, i)) & + /(s_M - s_P) + end do - ! Low Mach correction - if (low_Mach == 1) then - @:compute_low_Mach_correction() - else - pcorr = 0._wp + if (bubbles_euler) then + ! From HLLC: Kills mass transport @ bubble gas density + if (num_fluids > 1) then + flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp end if + end if - ! Mass - if (.not. relativity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*alpha_rho_R(i)*vel_R(norm_dir) & - - s_P*alpha_rho_L(i)*vel_L(norm_dir) & - + s_M*s_P*(alpha_rho_L(i) & - - alpha_rho_R(i))) & - /(s_M - s_P) - end do - elseif (relativity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) & - - s_P*Ga%L*alpha_rho_L(i)*vel_L(norm_dir) & - + s_M*s_P*(Ga%L*alpha_rho_L(i) & - - Ga%R*alpha_rho_R(i))) & - /(s_M - s_P) - end do - end if + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + + flux_rs${XYZ}$_vf(j, k, l, i) = (s_M*Y_R*rho_R*vel_R(dir_idx(1)) & + - s_P*Y_L*rho_L*vel_L(dir_idx(1)) & + + s_M*s_P*(Y_L*rho_L - Y_R*rho_R)) & + /(s_M - s_P) + flux_src_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do + end if - ! Momentum - if (mhd .and. (.not. relativity)) then + if (mhd) then + if (n == 0) then ! 1D: d/dx flux only & Bx = Bx0 = const. + ! B_y flux = v_x * B_y - v_y * Bx0 + ! B_z flux = v_x * B_z - v_z * Bx0 $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - ! Flux of rho*v_i in the ${XYZ}$ direction - ! = rho * v_i * v_${XYZ}$ - B_i * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot - flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & - (s_M*(rho_R*vel_R(i)*vel_R(norm_dir) & - - B%R(i)*B%R(norm_dir) & - + dir_flg(i)*(pres_R + pres_mag%R)) & - - s_P*(rho_L*vel_L(i)*vel_L(norm_dir) & - - B%L(i)*B%L(norm_dir) & - + dir_flg(i)*(pres_L + pres_mag%L)) & - + s_M*s_P*(rho_L*vel_L(i) - rho_R*vel_R(i))) & - /(s_M - s_P) + do i = 0, 1 + flux_rsx_vf(j, k, l, B_idx%beg + i) = (s_M*(vel_R(1)*B%R(2 + i) - vel_R(2 + i)*Bx0) & + - s_P*(vel_L(1)*B%L(2 + i) - vel_L(2 + i)*Bx0) & + + s_M*s_P*(B%L(2 + i) - B%R(2 + i)))/(s_M - s_P) end do - elseif (mhd .and. relativity) then + else ! 2D/3D: Bx, By, Bz /= const. but zero flux component in the same direction + ! B_x d/d${XYZ}$ flux = (1 - delta(x,${XYZ}$)) * (v_${XYZ}$ * B_x - v_x * B_${XYZ}$) + ! B_y d/d${XYZ}$ flux = (1 - delta(y,${XYZ}$)) * (v_${XYZ}$ * B_y - v_y * B_${XYZ}$) + ! B_z d/d${XYZ}$ flux = (1 - delta(z,${XYZ}$)) * (v_${XYZ}$ * B_z - v_z * B_${XYZ}$) $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - ! Flux of m_i in the ${XYZ}$ direction - ! = m_i * v_${XYZ}$ - b_i/Gamma * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot - flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & - (s_M*(cm%R(i)*vel_R(norm_dir) & - - b4%R(i)/Ga%R*B%R(norm_dir) & - + dir_flg(i)*(pres_R + pres_mag%R)) & - - s_P*(cm%L(i)*vel_L(norm_dir) & - - b4%L(i)/Ga%L*B%L(norm_dir) & - + dir_flg(i)*(pres_L + pres_mag%L)) & - + s_M*s_P*(cm%L(i) - cm%R(i))) & - /(s_M - s_P) - end do - elseif (bubbles_euler) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) - end do - else if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_R & - - tau_e_R(dir_idx_tau(i))) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_L & - - tau_e_L(dir_idx_tau(i))) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) - end do - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_R) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_L) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) + do i = 0, 2 + flux_rs${XYZ}$_vf(j, k, l, B_idx%beg + i) = (1 - dir_flg(i + 1))*( & + s_M*(vel_R(dir_idx(1))*B%R(i + 1) - vel_R(i + 1)*B%R(norm_dir)) - & + s_P*(vel_L(dir_idx(1))*B%L(i + 1) - vel_L(i + 1)*B%L(norm_dir)) + & + s_M*s_P*(B%L(i + 1) - B%R(i + 1)))/(s_M - s_P) end do end if + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp + end if - ! Energy - if (mhd .and. (.not. relativity)) then - ! energy flux = (E + p + p_mag) * v_${XYZ}$ - B_${XYZ}$ * (v_x*B_x + v_y*B_y + v_z*B_z) - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) - B%R(norm_dir)*(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) & - - s_P*(vel_L(norm_dir)*(E_L + pres_L + pres_mag%L) - B%L(norm_dir)*(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3))) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) - elseif (mhd .and. relativity) then - ! energy flux = m_${XYZ}$ - mass flux - ! Hard-coded for single-component for now - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(cm%R(norm_dir) - Ga%R*alpha_rho_R(1)*vel_R(norm_dir)) & - - s_P*(cm%L(norm_dir) - Ga%L*alpha_rho_L(1)*vel_L(norm_dir)) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) - else if (bubbles_euler) then - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) & - - s_P*vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp - else if (hypoelasticity) then - flux_tau_L = 0._wp; flux_tau_R = 0._wp + #:if (NORM_DIR == 2) + if (cyl_coord) then + !Substituting the advective flux into the inviscid geometrical source flux $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i)) - flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i)) + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) & - - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) & - + s_M*s_P*(E_L - E_R))/(s_M - s_P) - else - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*vel_R(dir_idx(1))*(E_R + pres_R) & - - s_P*vel_L(dir_idx(1))*(E_L + pres_L) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp - end if - - ! Elastic Stresses - if (hypoelasticity) then - do i = 1, strxe - strxb + 1 !TODO: this indexing may be slow - flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *tau_e_R(i)) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *tau_e_L(i)) & - + s_M*s_P*(rho_L*tau_e_L(i) & - - rho_R*tau_e_R(i))) & - /(s_M - s_P) + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & + flux_rs${XYZ}$_vf(j, k, l, contxe + 2) & + - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do end if - ! Advection - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (qL_prim_rs${XYZ}$_vf(j, k, l, i) & - - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) & - *s_M*s_P/(s_M - s_P) - flux_src_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - - s_P*qL_prim_rs${XYZ}$_vf(j, k, l, i)) & + if (cyl_coord .and. hypoelasticity) then + ! += tau_sigmasigma using HLL + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) + & + (s_M*tau_e_R(4) - s_P*tau_e_L(4)) & /(s_M - s_P) - end do - - if (bubbles_euler) then - ! From HLLC: Kills mass transport @ bubble gas density - if (num_fluids > 1) then - flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp - end if - end if - if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - - flux_rs${XYZ}$_vf(j, k, l, i) = (s_M*Y_R*rho_R*vel_R(dir_idx(1)) & - - s_P*Y_L*rho_L*vel_L(dir_idx(1)) & - + s_M*s_P*(Y_L*rho_L - Y_R*rho_R)) & - /(s_M - s_P) - flux_src_rs${XYZ}$_vf(j, k, l, i) = 0._wp + do i = strxb, strxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do end if + #:endif - if (mhd) then - if (n == 0) then ! 1D: d/dx flux only & Bx = Bx0 = const. - ! B_y flux = v_x * B_y - v_y * Bx0 - ! B_z flux = v_x * B_z - v_z * Bx0 - $:GPU_LOOP(parallelism='[seq]') - do i = 0, 1 - flux_rsx_vf(j, k, l, B_idx%beg + i) = (s_M*(vel_R(1)*B%R(2 + i) - vel_R(2 + i)*Bx0) & - - s_P*(vel_L(1)*B%L(2 + i) - vel_L(2 + i)*Bx0) & - + s_M*s_P*(B%L(2 + i) - B%R(2 + i)))/(s_M - s_P) - end do - else ! 2D/3D: Bx, By, Bz /= const. but zero flux component in the same direction - ! B_x d/d${XYZ}$ flux = (1 - delta(x,${XYZ}$)) * (v_${XYZ}$ * B_x - v_x * B_${XYZ}$) - ! B_y d/d${XYZ}$ flux = (1 - delta(y,${XYZ}$)) * (v_${XYZ}$ * B_y - v_y * B_${XYZ}$) - ! B_z d/d${XYZ}$ flux = (1 - delta(z,${XYZ}$)) * (v_${XYZ}$ * B_z - v_z * B_${XYZ}$) - $:GPU_LOOP(parallelism='[seq]') - do i = 0, 2 - flux_rs${XYZ}$_vf(j, k, l, B_idx%beg + i) = (1 - dir_flg(i + 1))*( & - s_M*(vel_R(dir_idx(1))*B%R(i + 1) - vel_R(i + 1)*B%R(norm_dir)) - & - s_P*(vel_L(dir_idx(1))*B%L(i + 1) - vel_L(i + 1)*B%L(norm_dir)) + & - s_M*s_P*(B%L(i + 1) - B%R(i + 1)))/(s_M - s_P) - end do - end if - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp - end if - - #:if (NORM_DIR == 2) - if (cyl_coord) then - !Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & - flux_rs${XYZ}$_vf(j, k, l, contxe + 2) & - - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) - ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - end if - - if (cyl_coord .and. hypoelasticity) then - ! += tau_sigmasigma using HLL - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) + & - (s_M*tau_e_R(4) - s_P*tau_e_L(4)) & - /(s_M - s_P) - - $:GPU_LOOP(parallelism='[seq]') - do i = strxb, strxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - end if - #:endif - - end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if #:endfor @@ -1118,721 +1118,721 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R, G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp, vel_grad_L, vel_grad_R, idx_right_phys]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do - - vel_L_rms = 0._wp; vel_R_rms = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp - end do - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do - - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - - if (mhd) then - if (n == 0) then ! 1D: constant Bx; By, Bz as variables - B%L(1) = Bx0 - B%R(1) = Bx0 - B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) - B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) - B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) - B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) - else ! 2D/3D: Bx, By, Bz as variables - B%L(1) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) - B%R(1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) - B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) - B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) - B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 2) - B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 2) - end if - end if + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R, G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp, vel_grad_L, vel_grad_R, idx_right_phys]') + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - rho_L = 0._wp - gamma_L = 0._wp - pi_inf_L = 0._wp - qv_L = 0._wp + vel_L_rms = 0._wp; vel_R_rms = 0._wp - rho_R = 0._wp - gamma_R = 0._wp - pi_inf_R = 0._wp - qv_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do - alpha_L_sum = 0._wp - alpha_R_sum = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - pres_mag%L = 0._wp - pres_mag%R = 0._wp + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + + if (mhd) then + if (n == 0) then ! 1D: constant Bx; By, Bz as variables + B%L(1) = Bx0 + B%R(1) = Bx0 + B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) + B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) + B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) + B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) + else ! 2D/3D: Bx, By, Bz as variables + B%L(1) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) + B%R(1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) + B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) + B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) + B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 2) + B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 2) + end if + end if - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = max(0._wp, alpha_rho_L(i)) - alpha_L(i) = min(max(0._wp, alpha_L(i)), 1._wp) - alpha_L_sum = alpha_L_sum + alpha_L(i) - alpha_rho_R(i) = max(0._wp, alpha_rho_R(i)) - alpha_R(i) = min(max(0._wp, alpha_R(i)), 1._wp) - alpha_R_sum = alpha_R_sum + alpha_R(i) - end do + rho_L = 0._wp + gamma_L = 0._wp + pi_inf_L = 0._wp + qv_L = 0._wp - alpha_L = alpha_L/max(alpha_L_sum, sgm_eps) - alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) - end if + rho_R = 0._wp + gamma_R = 0._wp + pi_inf_R = 0._wp + qv_R = 0._wp + + alpha_L_sum = 0._wp + alpha_R_sum = 0._wp + pres_mag%L = 0._wp + pres_mag%R = 0._wp + + if (mpp_lim) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - qv_L = qv_L + alpha_rho_L(i)*qvs(i) - - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - qv_R = qv_R + alpha_rho_R(i)*qvs(i) + alpha_rho_L(i) = max(0._wp, alpha_rho_L(i)) + alpha_L(i) = min(max(0._wp, alpha_L(i)), 1._wp) + alpha_L_sum = alpha_L_sum + alpha_L(i) + alpha_rho_R(i) = max(0._wp, alpha_rho_R(i)) + alpha_R(i) = min(max(0._wp, alpha_R(i)), 1._wp) + alpha_R_sum = alpha_R_sum + alpha_R(i) end do - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real + alpha_L = alpha_L/max(alpha_L_sum, sgm_eps) + alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) + end if - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + alpha_rho_L(i) + gamma_L = gamma_L + alpha_L(i)*gammas(i) + pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) + qv_L = qv_L + alpha_rho_L(i)*qvs(i) + + rho_R = rho_R + alpha_rho_R(i) + gamma_R = gamma_R + alpha_R(i)*gammas(i) + pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) + qv_R = qv_R + alpha_rho_R(i)*qvs(i) + end do - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & - + Re_R(i) - end do + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do - end if + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp - if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + do q = 1, Re_size(i) + Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & + + Re_R(i) end do - call get_mixture_molecular_weight(Ys_L, MW_L) - call get_mixture_molecular_weight(Ys_R, MW_R) - - Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) - - R_gas_L = gas_constant/MW_L - R_gas_R = gas_constant/MW_R - T_L = pres_L/rho_L/R_gas_L - T_R = pres_R/rho_R/R_gas_R + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do + end if - call get_species_specific_heats_r(T_L, Cp_iL) - call get_species_specific_heats_r(T_R, Cp_iR) + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - if (chem_params%gamma_method == 1) then - ! gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. - Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) - Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) + call get_mixture_molecular_weight(Ys_L, MW_L) + call get_mixture_molecular_weight(Ys_R, MW_R) + + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) + + R_gas_L = gas_constant/MW_L + R_gas_R = gas_constant/MW_R + T_L = pres_L/rho_L/R_gas_L + T_R = pres_R/rho_R/R_gas_R + + call get_species_specific_heats_r(T_L, Cp_iL) + call get_species_specific_heats_r(T_R, Cp_iR) + + if (chem_params%gamma_method == 1) then + ! gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. + Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) + Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) + + gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) + gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) + else if (chem_params%gamma_method == 2) then + ! gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. + call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) + call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) + call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) + call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) + + Gamm_L = Cp_L/Cv_L + gamma_L = 1.0_wp/(Gamm_L - 1.0_wp) + Gamm_R = Cp_R/Cv_R + gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) + end if - gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) - gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) - else if (chem_params%gamma_method == 2) then - ! gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) - call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) - call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) - call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) + call get_mixture_energy_mass(T_L, Ys_L, E_L) + call get_mixture_energy_mass(T_R, Ys_R, E_R) + + E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms + E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + elseif (mhd .and. relativity) then + Ga%L = 1._wp/sqrt(1._wp - vel_L_rms) + Ga%R = 1._wp/sqrt(1._wp - vel_R_rms) + vdotB%L = vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3) + vdotB%R = vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3) + + b4%L(1:3) = B%L(1:3)/Ga%L + Ga%L*vel_L(1:3)*vdotB%L + b4%R(1:3) = B%R(1:3)/Ga%R + Ga%R*vel_R(1:3)*vdotB%R + B2%L = B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp + B2%R = B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp + + pres_mag%L = 0.5_wp*(B2%L/Ga%L**2._wp + vdotB%L**2._wp) + pres_mag%R = 0.5_wp*(B2%R/Ga%R**2._wp + vdotB%R**2._wp) + + ! Hard-coded EOS + H_L = 1._wp + (gamma_L + 1)*pres_L/rho_L + H_R = 1._wp + (gamma_R + 1)*pres_R/rho_R + + cm%L(1:3) = (rho_L*H_L*Ga%L**2 + B2%L)*vel_L(1:3) - vdotB%L*B%L(1:3) + cm%R(1:3) = (rho_R*H_R*Ga%R**2 + B2%R)*vel_R(1:3) - vdotB%R*B%R(1:3) + + E_L = rho_L*H_L*Ga%L**2 - pres_L + 0.5_wp*(B2%L + vel_L_rms*B2%L - vdotB%L**2._wp) - rho_L*Ga%L + E_R = rho_R*H_R*Ga%R**2 - pres_R + 0.5_wp*(B2%R + vel_R_rms*B2%R - vdotB%R**2._wp) - rho_R*Ga%R + elseif (mhd .and. .not. relativity) then + pres_mag%L = 0.5_wp*(B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp) + pres_mag%R = 0.5_wp*(B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp) + E_L = gamma_L*pres_L + pi_inf_L + 0.5_wp*rho_L*vel_L_rms + qv_L + pres_mag%L + E_R = gamma_R*pres_R + pi_inf_R + 0.5_wp*rho_R*vel_R_rms + qv_R + pres_mag%R ! includes magnetic energy + H_L = (E_L + pres_L - pres_mag%L)/rho_L + H_R = (E_R + pres_R - pres_mag%R)/rho_R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) + else + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + end if - Gamm_L = Cp_L/Cv_L - gamma_L = 1.0_wp/(Gamm_L - 1.0_wp) - Gamm_R = Cp_R/Cv_R - gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) - end if + ! elastic energy update + if (hypoelasticity) then + G_L = 0._wp; G_R = 0._wp - call get_mixture_energy_mass(T_L, Ys_L, E_L) - call get_mixture_energy_mass(T_R, Ys_R, E_R) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) + end do - E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms - E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - elseif (mhd .and. relativity) then - Ga%L = 1._wp/sqrt(1._wp - vel_L_rms) - Ga%R = 1._wp/sqrt(1._wp - vel_R_rms) - vdotB%L = vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3) - vdotB%R = vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3) - - b4%L(1:3) = B%L(1:3)/Ga%L + Ga%L*vel_L(1:3)*vdotB%L - b4%R(1:3) = B%R(1:3)/Ga%R + Ga%R*vel_R(1:3)*vdotB%R - B2%L = B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp - B2%R = B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp - - pres_mag%L = 0.5_wp*(B2%L/Ga%L**2._wp + vdotB%L**2._wp) - pres_mag%R = 0.5_wp*(B2%R/Ga%R**2._wp + vdotB%R**2._wp) - - ! Hard-coded EOS - H_L = 1._wp + (gamma_L + 1)*pres_L/rho_L - H_R = 1._wp + (gamma_R + 1)*pres_R/rho_R - - cm%L(1:3) = (rho_L*H_L*Ga%L**2 + B2%L)*vel_L(1:3) - vdotB%L*B%L(1:3) - cm%R(1:3) = (rho_R*H_R*Ga%R**2 + B2%R)*vel_R(1:3) - vdotB%R*B%R(1:3) - - E_L = rho_L*H_L*Ga%L**2 - pres_L + 0.5_wp*(B2%L + vel_L_rms*B2%L - vdotB%L**2._wp) - rho_L*Ga%L - E_R = rho_R*H_R*Ga%R**2 - pres_R + 0.5_wp*(B2%R + vel_R_rms*B2%R - vdotB%R**2._wp) - rho_R*Ga%R - elseif (mhd .and. .not. relativity) then - pres_mag%L = 0.5_wp*(B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp) - pres_mag%R = 0.5_wp*(B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp) - E_L = gamma_L*pres_L + pi_inf_L + 0.5_wp*rho_L*vel_L_rms + qv_L + pres_mag%L - E_R = gamma_R*pres_R + pi_inf_R + 0.5_wp*rho_R*vel_R_rms + qv_R + pres_mag%R ! includes magnetic energy - H_L = (E_L + pres_L - pres_mag%L)/rho_L - H_R = (E_R + pres_R - pres_mag%R)/rho_R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) - else - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R + if (cont_damage) then + G_L = G_L*max((1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) + G_R = G_R*max((1._wp - qR_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) end if - ! elastic energy update - if (hypoelasticity) then - G_L = 0._wp; G_R = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - - if (cont_damage) then - G_L = G_L*max((1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) - G_R = G_R*max((1._wp - qR_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) - end if - - do i = 1, strxe - strxb + 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - ! Elastic contribution to energy if G large enough - !TODO take out if statement if stable without - if ((G_L > 1000) .and. (G_R > 1000)) then + do i = 1, strxe - strxb + 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + ! Elastic contribution to energy if G large enough + !TODO take out if statement if stable without + if ((G_L > 1000) .and. (G_R > 1000)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + ! Double for shear stresses + if (any(strxb - 1 + i == shear_indices)) then E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Double for shear stresses - if (any(strxb - 1 + i == shear_indices)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if end if - end do - end if - - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) + end if + end do + end if - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) - if (mhd) then - call s_compute_fast_magnetosonic_speed(rho_L, c_L, B%L, norm_dir, c_fast%L, H_L) - call s_compute_fast_magnetosonic_speed(rho_R, c_R, B%R, norm_dir, c_fast%R, H_R) - end if + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) - s_L = 0._wp; s_R = 0._wp + if (mhd) then + call s_compute_fast_magnetosonic_speed(rho_L, c_L, B%L, norm_dir, c_fast%L, H_L) + call s_compute_fast_magnetosonic_speed(rho_R, c_R, B%R, norm_dir, c_fast%R, H_R) + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - s_L = s_L + vel_L(i)**2._wp - s_R = s_R + vel_R(i)**2._wp - end do + s_L = 0._wp; s_R = 0._wp - s_L = sqrt(s_L) - s_R = sqrt(s_R) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + s_L = s_L + vel_L(i)**2._wp + s_R = s_R + vel_R(i)**2._wp + end do - s_P = max(s_L, s_R) + max(c_L, c_R) - s_M = -s_P + s_L = sqrt(s_L) + s_R = sqrt(s_R) - s_L = s_M - s_R = s_P + s_P = max(s_L, s_R) + max(c_L, c_R) + s_M = -s_P - ! Low Mach correction - if (low_Mach == 1) then - @:compute_low_Mach_correction() - else - pcorr = 0._wp - end if + s_L = s_M + s_R = s_P - ! Mass - if (.not. relativity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*alpha_rho_R(i)*vel_R(norm_dir) & - - s_P*alpha_rho_L(i)*vel_L(norm_dir) & - + s_M*s_P*(alpha_rho_L(i) & - - alpha_rho_R(i))) & - /(s_M - s_P) - end do - elseif (relativity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) & - - s_P*Ga%L*alpha_rho_L(i)*vel_L(norm_dir) & - + s_M*s_P*(Ga%L*alpha_rho_L(i) & - - Ga%R*alpha_rho_R(i))) & - /(s_M - s_P) - end do - end if + ! Low Mach correction + if (low_Mach == 1) then + @:compute_low_Mach_correction() + else + pcorr = 0._wp + end if - ! Momentum - if (mhd .and. (.not. relativity)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - ! Flux of rho*v_i in the ${XYZ}$ direction - ! = rho * v_i * v_${XYZ}$ - B_i * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot - flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & - (s_M*(rho_R*vel_R(i)*vel_R(norm_dir) & - - B%R(i)*B%R(norm_dir) & - + dir_flg(i)*(pres_R + pres_mag%R)) & - - s_P*(rho_L*vel_L(i)*vel_L(norm_dir) & - - B%L(i)*B%L(norm_dir) & - + dir_flg(i)*(pres_L + pres_mag%L)) & - + s_M*s_P*(rho_L*vel_L(i) - rho_R*vel_R(i))) & - /(s_M - s_P) - end do - elseif (mhd .and. relativity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - ! Flux of m_i in the ${XYZ}$ direction - ! = m_i * v_${XYZ}$ - b_i/Gamma * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot - flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & - (s_M*(cm%R(i)*vel_R(norm_dir) & - - b4%R(i)/Ga%R*B%R(norm_dir) & - + dir_flg(i)*(pres_R + pres_mag%R)) & - - s_P*(cm%L(i)*vel_L(norm_dir) & - - b4%L(i)/Ga%L*B%L(norm_dir) & - + dir_flg(i)*(pres_L + pres_mag%L)) & - + s_M*s_P*(cm%L(i) - cm%R(i))) & - /(s_M - s_P) - end do - elseif (bubbles_euler) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) - end do - else if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_R & - - tau_e_R(dir_idx_tau(i))) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_L & - - tau_e_L(dir_idx_tau(i))) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) - end do - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_R) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_L) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) - end do - end if + ! Mass + if (.not. relativity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*alpha_rho_R(i)*vel_R(norm_dir) & + - s_P*alpha_rho_L(i)*vel_L(norm_dir) & + + s_M*s_P*(alpha_rho_L(i) & + - alpha_rho_R(i))) & + /(s_M - s_P) + end do + elseif (relativity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) & + - s_P*Ga%L*alpha_rho_L(i)*vel_L(norm_dir) & + + s_M*s_P*(Ga%L*alpha_rho_L(i) & + - Ga%R*alpha_rho_R(i))) & + /(s_M - s_P) + end do + end if - ! Energy - if (mhd .and. (.not. relativity)) then - ! energy flux = (E + p + p_mag) * v_${XYZ}$ - B_${XYZ}$ * (v_x*B_x + v_y*B_y + v_z*B_z) - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) - B%R(norm_dir)*(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) & - - s_P*(vel_L(norm_dir)*(E_L + pres_L + pres_mag%L) - B%L(norm_dir)*(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3))) & - + s_M*s_P*(E_L - E_R)) & + ! Momentum + if (mhd .and. (.not. relativity)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + ! Flux of rho*v_i in the ${XYZ}$ direction + ! = rho * v_i * v_${XYZ}$ - B_i * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot + flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & + (s_M*(rho_R*vel_R(i)*vel_R(norm_dir) & + - B%R(i)*B%R(norm_dir) & + + dir_flg(i)*(pres_R + pres_mag%R)) & + - s_P*(rho_L*vel_L(i)*vel_L(norm_dir) & + - B%L(i)*B%L(norm_dir) & + + dir_flg(i)*(pres_L + pres_mag%L)) & + + s_M*s_P*(rho_L*vel_L(i) - rho_R*vel_R(i))) & /(s_M - s_P) - elseif (mhd .and. relativity) then - ! energy flux = m_${XYZ}$ - mass flux - ! Hard-coded for single-component for now - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(cm%R(norm_dir) - Ga%R*alpha_rho_R(1)*vel_R(norm_dir)) & - - s_P*(cm%L(norm_dir) - Ga%L*alpha_rho_L(1)*vel_L(norm_dir)) & - + s_M*s_P*(E_L - E_R)) & + end do + elseif (mhd .and. relativity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + ! Flux of m_i in the ${XYZ}$ direction + ! = m_i * v_${XYZ}$ - b_i/Gamma * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot + flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & + (s_M*(cm%R(i)*vel_R(norm_dir) & + - b4%R(i)/Ga%R*B%R(norm_dir) & + + dir_flg(i)*(pres_R + pres_mag%R)) & + - s_P*(cm%L(i)*vel_L(norm_dir) & + - b4%L(i)/Ga%L*B%L(norm_dir) & + + dir_flg(i)*(pres_L + pres_mag%L)) & + + s_M*s_P*(cm%L(i) - cm%R(i))) & /(s_M - s_P) - else if (bubbles_euler) then - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) & - - s_P*vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) & - + s_M*s_P*(E_L - E_R)) & + end do + elseif (bubbles_euler) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp - else if (hypoelasticity) then - flux_tau_L = 0._wp; flux_tau_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i)) - flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i)) - end do - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) & - - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) & - + s_M*s_P*(E_L - E_R))/(s_M - s_P) - else - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*vel_R(dir_idx(1))*(E_R + pres_R) & - - s_P*vel_L(dir_idx(1))*(E_L + pres_L) & - + s_M*s_P*(E_L - E_R)) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) + end do + else if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_R & + - tau_e_R(dir_idx_tau(i))) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_L & + - tau_e_L(dir_idx_tau(i))) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & + /(s_M - s_P) + end do + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_R) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_L) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp - end if - - ! Elastic Stresses - if (hypoelasticity) then - do i = 1, strxe - strxb + 1 !TODO: this indexing may be slow - flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *tau_e_R(i)) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *tau_e_L(i)) & - + s_M*s_P*(rho_L*tau_e_L(i) & - - rho_R*tau_e_R(i))) & - /(s_M - s_P) - end do - end if + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) + end do + end if - ! Advection + ! Energy + if (mhd .and. (.not. relativity)) then + ! energy flux = (E + p + p_mag) * v_${XYZ}$ - B_${XYZ}$ * (v_x*B_x + v_y*B_y + v_z*B_z) + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) - B%R(norm_dir)*(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) & + - s_P*(vel_L(norm_dir)*(E_L + pres_L + pres_mag%L) - B%L(norm_dir)*(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3))) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) + elseif (mhd .and. relativity) then + ! energy flux = m_${XYZ}$ - mass flux + ! Hard-coded for single-component for now + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(cm%R(norm_dir) - Ga%R*alpha_rho_R(1)*vel_R(norm_dir)) & + - s_P*(cm%L(norm_dir) - Ga%L*alpha_rho_L(1)*vel_L(norm_dir)) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) + else if (bubbles_euler) then + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) & + - s_P*vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp + else if (hypoelasticity) then + flux_tau_L = 0._wp; flux_tau_R = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (qL_prim_rs${XYZ}$_vf(j, k, l, i) & - - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) & - *s_M*s_P/(s_M - s_P) - flux_src_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - - s_P*qL_prim_rs${XYZ}$_vf(j, k, l, i)) & + do i = 1, num_dims + flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i)) + flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i)) + end do + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) & + - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) & + + s_M*s_P*(E_L - E_R))/(s_M - s_P) + else + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*vel_R(dir_idx(1))*(E_R + pres_R) & + - s_P*vel_L(dir_idx(1))*(E_L + pres_L) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp + end if + + ! Elastic Stresses + if (hypoelasticity) then + do i = 1, strxe - strxb + 1 !TODO: this indexing may be slow + flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *tau_e_R(i)) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *tau_e_L(i)) & + + s_M*s_P*(rho_L*tau_e_L(i) & + - rho_R*tau_e_R(i))) & /(s_M - s_P) end do + end if - if (bubbles_euler) then - ! From HLLC: Kills mass transport @ bubble gas density - if (num_fluids > 1) then - flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp - end if + ! Advection + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (qL_prim_rs${XYZ}$_vf(j, k, l, i) & + - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) & + *s_M*s_P/(s_M - s_P) + flux_src_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + - s_P*qL_prim_rs${XYZ}$_vf(j, k, l, i)) & + /(s_M - s_P) + end do + + if (bubbles_euler) then + ! From HLLC: Kills mass transport @ bubble gas density + if (num_fluids > 1) then + flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp end if + end if - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + + flux_rs${XYZ}$_vf(j, k, l, i) = (s_M*Y_R*rho_R*vel_R(dir_idx(1)) & + - s_P*Y_L*rho_L*vel_L(dir_idx(1)) & + + s_M*s_P*(Y_L*rho_L - Y_R*rho_R)) & + /(s_M - s_P) + flux_src_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do + end if - flux_rs${XYZ}$_vf(j, k, l, i) = (s_M*Y_R*rho_R*vel_R(dir_idx(1)) & - - s_P*Y_L*rho_L*vel_L(dir_idx(1)) & - + s_M*s_P*(Y_L*rho_L - Y_R*rho_R)) & - /(s_M - s_P) - flux_src_rs${XYZ}$_vf(j, k, l, i) = 0._wp + if (mhd) then + if (n == 0) then ! 1D: d/dx flux only & Bx = Bx0 = const. + ! B_y flux = v_x * B_y - v_y * Bx0 + ! B_z flux = v_x * B_z - v_z * Bx0 + $:GPU_LOOP(parallelism='[seq]') + do i = 0, 1 + flux_rsx_vf(j, k, l, B_idx%beg + i) = (s_M*(vel_R(1)*B%R(2 + i) - vel_R(2 + i)*Bx0) & + - s_P*(vel_L(1)*B%L(2 + i) - vel_L(2 + i)*Bx0) & + + s_M*s_P*(B%L(2 + i) - B%R(2 + i)))/(s_M - s_P) + end do + else ! 2D/3D: Bx, By, Bz /= const. but zero flux component in the same direction + ! B_x d/d${XYZ}$ flux = (1 - delta(x,${XYZ}$)) * (v_${XYZ}$ * B_x - v_x * B_${XYZ}$) + ! B_y d/d${XYZ}$ flux = (1 - delta(y,${XYZ}$)) * (v_${XYZ}$ * B_y - v_y * B_${XYZ}$) + ! B_z d/d${XYZ}$ flux = (1 - delta(z,${XYZ}$)) * (v_${XYZ}$ * B_z - v_z * B_${XYZ}$) + $:GPU_LOOP(parallelism='[seq]') + do i = 0, 2 + flux_rs${XYZ}$_vf(j, k, l, B_idx%beg + i) = (1 - dir_flg(i + 1))*( & + s_M*(vel_R(dir_idx(1))*B%R(i + 1) - vel_R(i + 1)*B%R(norm_dir)) - & + s_P*(vel_L(dir_idx(1))*B%L(i + 1) - vel_L(i + 1)*B%L(norm_dir)) + & + s_M*s_P*(B%L(i + 1) - B%R(i + 1)))/(s_M - s_P) end do end if + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp + end if - if (mhd) then - if (n == 0) then ! 1D: d/dx flux only & Bx = Bx0 = const. - ! B_y flux = v_x * B_y - v_y * Bx0 - ! B_z flux = v_x * B_z - v_z * Bx0 - $:GPU_LOOP(parallelism='[seq]') - do i = 0, 1 - flux_rsx_vf(j, k, l, B_idx%beg + i) = (s_M*(vel_R(1)*B%R(2 + i) - vel_R(2 + i)*Bx0) & - - s_P*(vel_L(1)*B%L(2 + i) - vel_L(2 + i)*Bx0) & - + s_M*s_P*(B%L(2 + i) - B%R(2 + i)))/(s_M - s_P) - end do - else ! 2D/3D: Bx, By, Bz /= const. but zero flux component in the same direction - ! B_x d/d${XYZ}$ flux = (1 - delta(x,${XYZ}$)) * (v_${XYZ}$ * B_x - v_x * B_${XYZ}$) - ! B_y d/d${XYZ}$ flux = (1 - delta(y,${XYZ}$)) * (v_${XYZ}$ * B_y - v_y * B_${XYZ}$) - ! B_z d/d${XYZ}$ flux = (1 - delta(z,${XYZ}$)) * (v_${XYZ}$ * B_z - v_z * B_${XYZ}$) - $:GPU_LOOP(parallelism='[seq]') - do i = 0, 2 - flux_rs${XYZ}$_vf(j, k, l, B_idx%beg + i) = (1 - dir_flg(i + 1))*( & - s_M*(vel_R(dir_idx(1))*B%R(i + 1) - vel_R(i + 1)*B%R(norm_dir)) - & - s_P*(vel_L(dir_idx(1))*B%L(i + 1) - vel_L(i + 1)*B%L(norm_dir)) + & - s_M*s_P*(B%L(i + 1) - B%R(i + 1)))/(s_M - s_P) - end do - end if - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp + #:if (NORM_DIR == 2) + if (cyl_coord) then + !Substituting the advective flux into the inviscid geometrical source flux + $:GPU_LOOP(parallelism='[seq]') + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & + flux_rs${XYZ}$_vf(j, k, l, contxe + 2) & + - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do end if - #:if (NORM_DIR == 2) - if (cyl_coord) then - !Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & - flux_rs${XYZ}$_vf(j, k, l, contxe + 2) & - - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) - ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - end if - - if (cyl_coord .and. hypoelasticity) then - ! += tau_sigmasigma using HLL - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) + & - (s_M*tau_e_R(4) - s_P*tau_e_L(4)) & - /(s_M - s_P) - - $:GPU_LOOP(parallelism='[seq]') - do i = strxb, strxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - end if - #:endif - end do + if (cyl_coord .and. hypoelasticity) then + ! += tau_sigmasigma using HLL + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) + & + (s_M*tau_e_R(4) - s_P*tau_e_L(4)) & + /(s_M - s_P) + + $:GPU_LOOP(parallelism='[seq]') + do i = strxb, strxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + end if + #:endif end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if #:endfor if (viscous) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[idx_right_phys, vel_grad_L, vel_grad_R, alpha_L, alpha_R, vel_L, vel_R, Re_L, Re_R]') - do l = isz%beg, isz%end - do k = isy%beg, isy%end - do j = isx%beg, isx%end - idx_right_phys(1) = j - idx_right_phys(2) = k - idx_right_phys(3) = l - idx_right_phys(norm_dir) = idx_right_phys(norm_dir) + 1 - - if (norm_dir == 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rsx_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rsx_vf(j + 1, k, l, E_idx + i) - end do + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,idx_right_phys, vel_grad_L, vel_grad_R, alpha_L, alpha_R, vel_L, vel_R, Re_L, Re_R]') + do l = isz%beg, isz%end + do k = isy%beg, isy%end + do j = isx%beg, isx%end + idx_right_phys(1) = j + idx_right_phys(2) = k + idx_right_phys(3) = l + idx_right_phys(norm_dir) = idx_right_phys(norm_dir) + 1 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rsx_vf(j, k, l, momxb + i - 1) - vel_R(i) = qR_prim_rsx_vf(j + 1, k, l, momxb + i - 1) - end do - else if (norm_dir == 2) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rsy_vf(k, j, l, E_idx + i) - alpha_R(i) = qR_prim_rsy_vf(k + 1, j, l, E_idx + i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rsy_vf(k, j, l, momxb + i - 1) - vel_R(i) = qR_prim_rsy_vf(k + 1, j, l, momxb + i - 1) - end do - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rsz_vf(l, k, j, E_idx + i) - alpha_R(i) = qR_prim_rsz_vf(l + 1, k, j, E_idx + i) - end do + if (norm_dir == 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rsx_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rsx_vf(j + 1, k, l, E_idx + i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rsz_vf(l, k, j, momxb + i - 1) - vel_R(i) = qR_prim_rsz_vf(l + 1, k, j, momxb + i - 1) - end do - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = qL_prim_rsx_vf(j, k, l, momxb + i - 1) + vel_R(i) = qR_prim_rsx_vf(j + 1, k, l, momxb + i - 1) + end do + else if (norm_dir == 2) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rsy_vf(k, j, l, E_idx + i) + alpha_R(i) = qR_prim_rsy_vf(k + 1, j, l, E_idx + i) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = qL_prim_rsy_vf(k, j, l, momxb + i - 1) + vel_R(i) = qR_prim_rsy_vf(k + 1, j, l, momxb + i - 1) + end do + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rsz_vf(l, k, j, E_idx + i) + alpha_R(i) = qR_prim_rsz_vf(l + 1, k, j, E_idx + i) + end do $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real + do i = 1, num_dims + vel_L(i) = qL_prim_rsz_vf(l, k, j, momxb + i - 1) + vel_R(i) = qR_prim_rsz_vf(l + 1, k, j, momxb + i - 1) + end do + end if - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & - + Re_R(i) - end do + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & + + Re_R(i) end do - if (shear_stress) then - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_grad_L(i, 1) = (dqL_prim_dx_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) - vel_grad_R(i, 1) = (dqR_prim_dx_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) - if (num_dims > 1) then - vel_grad_L(i, 2) = (dqL_prim_dy_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) - vel_grad_R(i, 2) = (dqR_prim_dy_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) - end if - if (num_dims > 2) then - vel_grad_L(i, 3) = (dqL_prim_dz_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) - vel_grad_R(i, 3) = (dqR_prim_dz_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) - end if - end do + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do - if (norm_dir == 1) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(1) + vel_grad_R(1, 1)*vel_R(1)) - if (num_dims > 1) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(1) + vel_grad_R(2, 2)*vel_R(1)) - - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2) + vel_grad_R(1, 2)) - 0.5_wp*(vel_grad_L(2, 1) + vel_grad_R(2, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2)*vel_L(2) + vel_grad_R(1, 2)*vel_R(2)) - 0.5_wp*(vel_grad_L(2, 1)*vel_L(2) + vel_grad_R(2, 1)*vel_R(2)) - if (num_dims > 2) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(1) + vel_grad_R(3, 3)*vel_R(1)) - - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3) + vel_grad_R(1, 3)) - 0.5_wp*(vel_grad_L(3, 1) + vel_grad_R(3, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3)*vel_L(3) + vel_grad_R(1, 3)*vel_R(3)) - 0.5_wp*(vel_grad_L(3, 1)*vel_L(3) + vel_grad_R(3, 1)*vel_R(3)) - end if - end if + if (shear_stress) then - else if (norm_dir == 2) then - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(2) + vel_grad_R(1, 1)*vel_R(2)) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_grad_L(i, 1) = (dqL_prim_dx_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) + vel_grad_R(i, 1) = (dqR_prim_dx_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) + if (num_dims > 1) then + vel_grad_L(i, 2) = (dqL_prim_dy_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) + vel_grad_R(i, 2) = (dqR_prim_dy_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) + end if + if (num_dims > 2) then + vel_grad_L(i, 3) = (dqL_prim_dz_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) + vel_grad_R(i, 3) = (dqR_prim_dz_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) + end if + end do - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(2) + vel_grad_R(2, 2)*vel_R(2)) + if (norm_dir == 1) then + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(1) + vel_grad_R(1, 1)*vel_R(1)) + if (num_dims > 1) then + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(1) + vel_grad_R(2, 2)*vel_R(1)) - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2) + vel_grad_R(1, 2)) - 0.5_wp*(vel_grad_L(2, 1) + vel_grad_R(2, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2)*vel_L(1) + vel_grad_R(1, 2)*vel_R(1)) - 0.5_wp*(vel_grad_L(2, 1)*vel_L(1) + vel_grad_R(2, 1)*vel_R(1)) + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2) + vel_grad_R(1, 2)) - 0.5_wp*(vel_grad_L(2, 1) + vel_grad_R(2, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2)*vel_L(2) + vel_grad_R(1, 2)*vel_R(2)) - 0.5_wp*(vel_grad_L(2, 1)*vel_L(2) + vel_grad_R(2, 1)*vel_R(2)) if (num_dims > 2) then - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(2) + vel_grad_R(3, 3)*vel_R(2)) + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(1) + vel_grad_R(3, 3)*vel_R(1)) - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3) + vel_grad_R(2, 3)) - 0.5_wp*(vel_grad_L(3, 2) + vel_grad_R(3, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3)*vel_L(3) + vel_grad_R(2, 3)*vel_R(3)) - 0.5_wp*(vel_grad_L(3, 2)*vel_L(3) + vel_grad_R(3, 2)*vel_R(3)) + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3) + vel_grad_R(1, 3)) - 0.5_wp*(vel_grad_L(3, 1) + vel_grad_R(3, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3)*vel_L(3) + vel_grad_R(1, 3)*vel_R(3)) - 0.5_wp*(vel_grad_L(3, 1)*vel_L(3) + vel_grad_R(3, 1)*vel_R(3)) end if - else - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(3) + vel_grad_R(1, 1)*vel_R(3)) - - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(3) + vel_grad_R(2, 2)*vel_R(3)) + end if - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3) + vel_grad_R(1, 3)) - 0.5_wp*(vel_grad_L(3, 1) + vel_grad_R(3, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3)*vel_L(1) + vel_grad_R(1, 3)*vel_R(1)) - 0.5_wp*(vel_grad_L(3, 1)*vel_L(1) + vel_grad_R(3, 1)*vel_R(1)) + else if (norm_dir == 2) then + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(2) + vel_grad_R(1, 1)*vel_R(2)) - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(3) + vel_grad_R(3, 3)*vel_R(3)) + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(2) + vel_grad_R(2, 2)*vel_R(2)) - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3) + vel_grad_R(2, 3)) - 0.5_wp*(vel_grad_L(3, 2) + vel_grad_R(3, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3)*vel_L(2) + vel_grad_R(2, 3)*vel_R(2)) - 0.5_wp*(vel_grad_L(3, 2)*vel_L(2) + vel_grad_R(3, 2)*vel_R(2)) + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2) + vel_grad_R(1, 2)) - 0.5_wp*(vel_grad_L(2, 1) + vel_grad_R(2, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2)*vel_L(1) + vel_grad_R(1, 2)*vel_R(1)) - 0.5_wp*(vel_grad_L(2, 1)*vel_L(1) + vel_grad_R(2, 1)*vel_R(1)) + if (num_dims > 2) then + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(2) + vel_grad_R(3, 3)*vel_R(2)) + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3) + vel_grad_R(2, 3)) - 0.5_wp*(vel_grad_L(3, 2) + vel_grad_R(3, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3)*vel_L(3) + vel_grad_R(2, 3)*vel_R(3)) - 0.5_wp*(vel_grad_L(3, 2)*vel_L(3) + vel_grad_R(3, 2)*vel_R(3)) end if - end if + else + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(3) + vel_grad_R(1, 1)*vel_R(3)) - if (bulk_stress) then + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(3) + vel_grad_R(2, 2)*vel_R(3)) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_grad_L(i, 1) = (dqL_prim_dx_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) - vel_grad_R(i, 1) = (dqR_prim_dx_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) - if (num_dims > 1) then - vel_grad_L(i, 2) = (dqL_prim_dy_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) - vel_grad_R(i, 2) = (dqR_prim_dy_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) - end if - if (num_dims > 2) then - vel_grad_L(i, 3) = (dqL_prim_dz_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) - vel_grad_R(i, 3) = (dqR_prim_dz_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) - end if - end do + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3) + vel_grad_R(1, 3)) - 0.5_wp*(vel_grad_L(3, 1) + vel_grad_R(3, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3)*vel_L(1) + vel_grad_R(1, 3)*vel_R(1)) - 0.5_wp*(vel_grad_L(3, 1)*vel_L(1) + vel_grad_R(3, 1)*vel_R(1)) - if (norm_dir == 1) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(1) + vel_grad_R(1, 1)*vel_R(1)) - if (num_dims > 1) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(1) + vel_grad_R(2, 2)*vel_R(1)) + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(3) + vel_grad_R(3, 3)*vel_R(3)) - if (num_dims > 2) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(1) + vel_grad_R(3, 3)*vel_R(1)) - end if - end if + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3) + vel_grad_R(2, 3)) - 0.5_wp*(vel_grad_L(3, 2) + vel_grad_R(3, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3)*vel_L(2) + vel_grad_R(2, 3)*vel_R(2)) - 0.5_wp*(vel_grad_L(3, 2)*vel_L(2) + vel_grad_R(3, 2)*vel_R(2)) + + end if + end if + + if (bulk_stress) then - else if (norm_dir == 2) then - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(2) + vel_grad_R(1, 1)*vel_R(2)) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_grad_L(i, 1) = (dqL_prim_dx_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) + vel_grad_R(i, 1) = (dqR_prim_dx_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) + if (num_dims > 1) then + vel_grad_L(i, 2) = (dqL_prim_dy_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) + vel_grad_R(i, 2) = (dqR_prim_dy_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) + end if + if (num_dims > 2) then + vel_grad_L(i, 3) = (dqL_prim_dz_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) + vel_grad_R(i, 3) = (dqR_prim_dz_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) + end if + end do - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(2) + vel_grad_R(2, 2)*vel_R(2)) + if (norm_dir == 1) then + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(1) + vel_grad_R(1, 1)*vel_R(1)) + if (num_dims > 1) then + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(1) + vel_grad_R(2, 2)*vel_R(1)) if (num_dims > 2) then - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(2) + vel_grad_R(3, 3)*vel_R(2)) + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(1) + vel_grad_R(3, 3)*vel_R(1)) end if - else - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(3) + vel_grad_R(1, 1)*vel_R(3)) + end if - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(3) + vel_grad_R(2, 2)*vel_R(3)) + else if (norm_dir == 2) then + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(2) + vel_grad_R(1, 1)*vel_R(2)) - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(3) + vel_grad_R(3, 3)*vel_R(3)) + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(2) + vel_grad_R(2, 2)*vel_R(2)) + if (num_dims > 2) then + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(2) + vel_grad_R(3, 3)*vel_R(2)) end if + else + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(3) + vel_grad_R(1, 1)*vel_R(3)) + + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(3) + vel_grad_R(2, 2)*vel_R(3)) + + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(3) + vel_grad_R(3, 3)*vel_R(3)) end if - end do + + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if @@ -1991,1557 +1991,1557 @@ contains ! 6-EQUATION MODEL WITH HLLC if (model_eqns == 3) then !ME3 - #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel_L, vel_R, vel_K_Star, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, vel_avg_rms, alpha_L, alpha_R, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, tau_e_L, tau_e_R, G_L, G_R, flux_ene_e, xi_field_L, xi_field_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - - idx1 = dir_idx(1) - - vel_L_rms = 0._wp; vel_R_rms = 0._wp - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp - qv_L = 0._wp; qv_R = 0._wp - alpha_L_sum = 0._wp; alpha_R_sum = 0._wp + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,vel_L, vel_R, vel_K_Star, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, vel_avg_rms, alpha_L, alpha_R, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, tau_e_L, tau_e_R, G_L, G_R, flux_ene_e, xi_field_L, xi_field_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp - end do + idx1 = dir_idx(1) - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + vel_L_rms = 0._wp; vel_R_rms = 0._wp + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp + qv_L = 0._wp; qv_R = 0._wp + alpha_L_sum = 0._wp; alpha_R_sum = 0._wp - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) - alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) - end do - end if + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + if (mpp_lim) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) - - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) + alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, advxb + i - 1) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, advxb + i - 1) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) end do + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) + gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + + rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, advxb + i - 1) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, advxb + i - 1) + end do - if (viscous) then + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & - + Re_R(i) - end do - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + do q = 1, Re_size(i) + Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + + Re_R(i) end do - end if + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do + end if - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R - ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - end do - G_L = 0._wp; G_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - ! Elastic contribution to energy if G large enough - if ((G_L > verysmall) .and. (G_R > verysmall)) then + ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY + if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + end do + G_L = 0._wp; G_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + ! Elastic contribution to energy if G large enough + if ((G_L > verysmall) .and. (G_R > verysmall)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + ! Additional terms in 2D and 3D + if ((i == 2) .or. (i == 4) .or. (i == 5)) then E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Additional terms in 2D and 3D - if ((i == 2) .or. (i == 4) .or. (i == 5)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if end if - end do + end if + end do + end if + + ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY + if (hyperelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) + xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) + end do + G_L = 0._wp; G_R = 0._wp; + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + ! Mixture left and right shear modulus + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) + end do + ! Elastic contribution to energy if G large enough + if (G_L > verysmall .and. G_R > verysmall) then + E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) + E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, b_size - 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + end do + end if + + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + + @:compute_average_state() + + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) + + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) + + !> The computation of c_avg does not require all the variables, and therefore the non '_avg' + ! variables are placeholders to call the subroutine. + call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & + vel_avg_rms, 0._wp, c_avg) + + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) + end do + end if + + ! Low Mach correction + if (low_Mach == 2) then + @:compute_low_Mach_correction() + end if + + ! COMPUTING THE DIRECT WAVE SPEEDS + if (wave_speeds == 1) then + if (elasticity) then + s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L), vel_R(dir_idx(1)) - sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R)) + s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R), vel_L(dir_idx(1)) + sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L)) + s_S = (pres_R - tau_e_R(dir_idx_tau(1)) - pres_L + & + tau_e_L(dir_idx_tau(1)) + rho_L*vel_L(idx1)*(s_L - vel_L(idx1)) - & + rho_R*vel_R(idx1)*(s_R - vel_R(idx1)))/(rho_L*(s_L - vel_L(idx1)) - & + rho_R*(s_R - vel_R(idx1))) + else + s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) + s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) + s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & + (s_L - vel_L(dir_idx(1))) - rho_R*vel_R(dir_idx(1))*(s_R - vel_R(dir_idx(1)))) & + /(rho_L*(s_L - vel_L(dir_idx(1))) - rho_R*(s_R - vel_R(dir_idx(1)))) - ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY - if (hyperelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) - xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) - end do - G_L = 0._wp; G_R = 0._wp; - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - ! Mixture left and right shear modulus - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - ! Elastic contribution to energy if G large enough - if (G_L > verysmall .and. G_R > verysmall) then - E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) - E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) - end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, b_size - 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - end do end if + elseif (wave_speeds == 2) then + pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & + (vel_L(dir_idx(1)) - & + vel_R(dir_idx(1)))) - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R + pres_SR = pres_SL - @:compute_average_state() + Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & + (pres_SL/pres_L - 1._wp)*pres_L/ & + ((pres_L + pi_inf_L/(1._wp + gamma_L))))) + Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & + (pres_SR/pres_R - 1._wp)*pres_R/ & + ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) + s_L = vel_L(dir_idx(1)) - c_L*Ms_L + s_R = vel_R(dir_idx(1)) + c_R*Ms_R - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & + (pres_L - pres_R)/ & + (rho_avg*c_avg)) + end if - !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. - call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, 0._wp, c_avg) + ! follows Einfeldt et al. + ! s_M/P = min/max(0.,s_L/R) + s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) - end do - end if + ! goes with q_star_L/R = xi_L/R * (variable) + ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) + xi_L = (s_L - vel_L(idx1))/(s_L - s_S) + xi_R = (s_R - vel_R(idx1))/(s_R - s_S) - ! Low Mach correction - if (low_Mach == 2) then - @:compute_low_Mach_correction() - end if + ! goes with numerical star velocity in x/y/z directions + ! xi_P/M = 0.5 +/m sgn(0.5,s_star) + xi_M = (5.e-1_wp + sign(0.5_wp, s_S)) + xi_P = (5.e-1_wp - sign(0.5_wp, s_S)) - ! COMPUTING THE DIRECT WAVE SPEEDS - if (wave_speeds == 1) then - if (elasticity) then - s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L), vel_R(dir_idx(1)) - sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R)) - s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R), vel_L(dir_idx(1)) + sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L)) - s_S = (pres_R - tau_e_R(dir_idx_tau(1)) - pres_L + & - tau_e_L(dir_idx_tau(1)) + rho_L*vel_L(idx1)*(s_L - vel_L(idx1)) - & - rho_R*vel_R(idx1)*(s_R - vel_R(idx1)))/(rho_L*(s_L - vel_L(idx1)) - & - rho_R*(s_R - vel_R(idx1))) - else - s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) - s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) - s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & - (s_L - vel_L(dir_idx(1))) - rho_R*vel_R(dir_idx(1))*(s_R - vel_R(dir_idx(1)))) & - /(rho_L*(s_L - vel_L(dir_idx(1))) - rho_R*(s_R - vel_R(dir_idx(1)))) + ! goes with the numerical velocity in x/y/z directions + ! xi_P/M (pressure) = min/max(0. sgn(1,sL/sR)) + xi_MP = -min(0._wp, sign(1._wp, s_L)) + xi_PP = max(0._wp, sign(1._wp, s_R)) - end if - elseif (wave_speeds == 2) then - pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & - (vel_L(dir_idx(1)) - & - vel_R(dir_idx(1)))) - - pres_SR = pres_SL - - Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & - (pres_SL/pres_L - 1._wp)*pres_L/ & - ((pres_L + pi_inf_L/(1._wp + gamma_L))))) - Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & - (pres_SR/pres_R - 1._wp)*pres_R/ & - ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - - s_L = vel_L(dir_idx(1)) - c_L*Ms_L - s_R = vel_R(dir_idx(1)) + c_R*Ms_R - - s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & - (pres_L - pres_R)/ & - (rho_avg*c_avg)) - end if + E_star = xi_M*(E_L + xi_MP*(xi_L*(E_L + (s_S - vel_L(dir_idx(1)))* & + (rho_L*s_S + pres_L/(s_L - vel_L(dir_idx(1))))) - E_L)) + & + xi_P*(E_R + xi_PP*(xi_R*(E_R + (s_S - vel_R(dir_idx(1)))* & + (rho_R*s_S + pres_R/(s_R - vel_R(dir_idx(1))))) - E_R)) + p_Star = xi_M*(pres_L + xi_MP*(rho_L*(s_L - vel_L(dir_idx(1)))*(s_S - vel_L(dir_idx(1))))) + & + xi_P*(pres_R + xi_PP*(rho_R*(s_R - vel_R(dir_idx(1)))*(s_S - vel_R(dir_idx(1))))) - ! follows Einfeldt et al. - ! s_M/P = min/max(0.,s_L/R) - s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) - - ! goes with q_star_L/R = xi_L/R * (variable) - ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) - xi_L = (s_L - vel_L(idx1))/(s_L - s_S) - xi_R = (s_R - vel_R(idx1))/(s_R - s_S) - - ! goes with numerical star velocity in x/y/z directions - ! xi_P/M = 0.5 +/m sgn(0.5,s_star) - xi_M = (5.e-1_wp + sign(0.5_wp, s_S)) - xi_P = (5.e-1_wp - sign(0.5_wp, s_S)) - - ! goes with the numerical velocity in x/y/z directions - ! xi_P/M (pressure) = min/max(0. sgn(1,sL/sR)) - xi_MP = -min(0._wp, sign(1._wp, s_L)) - xi_PP = max(0._wp, sign(1._wp, s_R)) - - E_star = xi_M*(E_L + xi_MP*(xi_L*(E_L + (s_S - vel_L(dir_idx(1)))* & - (rho_L*s_S + pres_L/(s_L - vel_L(dir_idx(1))))) - E_L)) + & - xi_P*(E_R + xi_PP*(xi_R*(E_R + (s_S - vel_R(dir_idx(1)))* & - (rho_R*s_S + pres_R/(s_R - vel_R(dir_idx(1))))) - E_R)) - p_Star = xi_M*(pres_L + xi_MP*(rho_L*(s_L - vel_L(dir_idx(1)))*(s_S - vel_L(dir_idx(1))))) + & - xi_P*(pres_R + xi_PP*(rho_R*(s_R - vel_R(dir_idx(1)))*(s_S - vel_R(dir_idx(1))))) - - rho_Star = xi_M*(rho_L*(xi_MP*xi_L + 1._wp - xi_MP)) + & - xi_P*(rho_R*(xi_PP*xi_R + 1._wp - xi_PP)) - - vel_K_Star = vel_L(idx1)*(1._wp - xi_MP) + xi_MP*vel_R(idx1) + & - xi_MP*xi_PP*(s_S - vel_R(idx1)) - - ! Low Mach correction - if (low_Mach == 1) then - @:compute_low_Mach_correction() - else - pcorr = 0._wp - end if + rho_Star = xi_M*(rho_L*(xi_MP*xi_L + 1._wp - xi_MP)) + & + xi_P*(rho_R*(xi_PP*xi_R + 1._wp - xi_PP)) - ! COMPUTING FLUXES - ! MASS FLUX. - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*(vel_L(idx1) + s_M*(xi_L - 1._wp)) + & - xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*(vel_R(idx1) + s_P*(xi_R - 1._wp)) - end do + vel_K_Star = vel_L(idx1)*(1._wp - xi_MP) + xi_MP*vel_R(idx1) + & + xi_MP*xi_PP*(s_S - vel_R(idx1)) + + ! Low Mach correction + if (low_Mach == 1) then + @:compute_low_Mach_correction() + else + pcorr = 0._wp + end if + + ! COMPUTING FLUXES + ! MASS FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*(vel_L(idx1) + s_M*(xi_L - 1._wp)) + & + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*(vel_R(idx1) + s_P*(xi_R - 1._wp)) + end do + + ! MOMENTUM FLUX. + ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + idxi = dir_idx(i) + flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = rho_Star*vel_K_Star* & + (dir_flg(idxi)*vel_K_Star + (1._wp - dir_flg(idxi))*(xi_M*vel_L(idxi) + xi_P*vel_R(idxi))) + dir_flg(idxi)*p_Star & + + (s_M/s_L)*(s_P/s_R)*dir_flg(idxi)*pcorr + end do - ! MOMENTUM FLUX. - ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + ! ENERGY FLUX. + ! f = u*(E-\sigma), q = E, q_star = \xi*E+(s-u)(\rho s_star - \sigma/(s-u)) + flux_rs${XYZ}$_vf(j, k, l, E_idx) = (E_star + p_Star)*vel_K_Star & + + (s_M/s_L)*(s_P/s_R)*pcorr*s_S + + ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux + if (elasticity) then + flux_ene_e = 0._wp; $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims idxi = dir_idx(i) - flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = rho_Star*vel_K_Star* & - (dir_flg(idxi)*vel_K_Star + (1._wp - dir_flg(idxi))*(xi_M*vel_L(idxi) + xi_P*vel_R(idxi))) + dir_flg(idxi)*p_Star & - + (s_M/s_L)*(s_P/s_R)*dir_flg(idxi)*pcorr + ! MOMENTUM ELASTIC FLUX. + flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & + flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) & + - xi_M*tau_e_L(dir_idx_tau(i)) - xi_P*tau_e_R(dir_idx_tau(i)) + ! ENERGY ELASTIC FLUX. + flux_ene_e = flux_ene_e - & + xi_M*(vel_L(idxi)*tau_e_L(dir_idx_tau(i)) + & + s_M*(xi_L*((s_S - vel_L(i))*(tau_e_L(dir_idx_tau(i))/(s_L - vel_L(i)))))) - & + xi_P*(vel_R(idxi)*tau_e_R(dir_idx_tau(i)) + & + s_P*(xi_R*((s_S - vel_R(i))*(tau_e_R(dir_idx_tau(i))/(s_R - vel_R(i)))))) end do + flux_rs${XYZ}$_vf(j, k, l, E_idx) = flux_rs${XYZ}$_vf(j, k, l, E_idx) + flux_ene_e + end if - ! ENERGY FLUX. - ! f = u*(E-\sigma), q = E, q_star = \xi*E+(s-u)(\rho s_star - \sigma/(s-u)) - flux_rs${XYZ}$_vf(j, k, l, E_idx) = (E_star + p_Star)*vel_K_Star & - + (s_M/s_L)*(s_P/s_R)*pcorr*s_S + ! VOLUME FRACTION FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*s_S + & + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*s_S + end do - ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux - if (elasticity) then - flux_ene_e = 0._wp; - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - idxi = dir_idx(i) - ! MOMENTUM ELASTIC FLUX. - flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & - flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) & - - xi_M*tau_e_L(dir_idx_tau(i)) - xi_P*tau_e_R(dir_idx_tau(i)) - ! ENERGY ELASTIC FLUX. - flux_ene_e = flux_ene_e - & - xi_M*(vel_L(idxi)*tau_e_L(dir_idx_tau(i)) + & - s_M*(xi_L*((s_S - vel_L(i))*(tau_e_L(dir_idx_tau(i))/(s_L - vel_L(i)))))) - & - xi_P*(vel_R(idxi)*tau_e_R(dir_idx_tau(i)) + & - s_P*(xi_R*((s_S - vel_R(i))*(tau_e_R(dir_idx_tau(i))/(s_R - vel_R(i)))))) - end do - flux_rs${XYZ}$_vf(j, k, l, E_idx) = flux_rs${XYZ}$_vf(j, k, l, E_idx) + flux_ene_e - end if + ! SOURCE TERM FOR VOLUME FRACTION ADVECTION FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + idxi = dir_idx(i) + vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & + xi_M*(vel_L(idxi) + dir_flg(idxi)*(s_S*(xi_MP*(xi_L - 1) + 1) - vel_L(idxi))) + & + xi_P*(vel_R(idxi) + dir_flg(idxi)*(s_S*(xi_PP*(xi_R - 1) + 1) - vel_R(idxi))) + end do - ! VOLUME FRACTION FLUX. - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*s_S + & - xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*s_S - end do + ! INTERNAL ENERGIES ADVECTION FLUX. + ! K-th pressure and velocity in preparation for the internal energy flux + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + p_K_Star = xi_M*(xi_MP*((pres_L + pi_infs(i)/(1._wp + gammas(i)))* & + xi_L**(1._wp/gammas(i) + 1._wp) - pi_infs(i)/(1._wp + gammas(i)) - pres_L) + pres_L) + & + xi_P*(xi_PP*((pres_R + pi_infs(i)/(1._wp + gammas(i)))* & + xi_R**(1._wp/gammas(i) + 1._wp) - pi_infs(i)/(1._wp + gammas(i)) - pres_R) + pres_R) + + flux_rs${XYZ}$_vf(j, k, l, i + intxb - 1) = & + ((xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + advxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + advxb - 1))* & + (gammas(i)*p_K_Star + pi_infs(i)) + & + (xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + contxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + contxb - 1))* & + qvs(i))*vel_K_Star & + + (s_M/s_L)*(s_P/s_R)*pcorr*s_S*(xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + advxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + advxb - 1)) + end do + + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, idx1) - ! SOURCE TERM FOR VOLUME FRACTION ADVECTION FLUX. + ! HYPOELASTIC STRESS EVOLUTION FLUX. + if (hypoelasticity) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - idxi = dir_idx(i) - vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & - xi_M*(vel_L(idxi) + dir_flg(idxi)*(s_S*(xi_MP*(xi_L - 1) + 1) - vel_L(idxi))) + & - xi_P*(vel_R(idxi) + dir_flg(idxi)*(s_S*(xi_PP*(xi_R - 1) + 1) - vel_R(idxi))) + do i = 1, strxe - strxb + 1 + flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & + xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & + xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(idx1)*tau_e_R(i)) end do + end if - ! INTERNAL ENERGIES ADVECTION FLUX. - ! K-th pressure and velocity in preparation for the internal energy flux + ! REFERENCE MAP FLUX. + if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - p_K_Star = xi_M*(xi_MP*((pres_L + pi_infs(i)/(1._wp + gammas(i)))* & - xi_L**(1._wp/gammas(i) + 1._wp) - pi_infs(i)/(1._wp + gammas(i)) - pres_L) + pres_L) + & - xi_P*(xi_PP*((pres_R + pi_infs(i)/(1._wp + gammas(i)))* & - xi_R**(1._wp/gammas(i) + 1._wp) - pi_infs(i)/(1._wp + gammas(i)) - pres_R) + pres_R) - - flux_rs${XYZ}$_vf(j, k, l, i + intxb - 1) = & - ((xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + advxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + advxb - 1))* & - (gammas(i)*p_K_Star + pi_infs(i)) + & - (xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + contxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + contxb - 1))* & - qvs(i))*vel_K_Star & - + (s_M/s_L)*(s_P/s_R)*pcorr*s_S*(xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + advxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + advxb - 1)) + do i = 1, num_dims + flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & + xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & + - rho_L*vel_L(idx1)*xi_field_L(i)) + & + xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*xi_field_R(i) & + - rho_R*vel_R(idx1)*xi_field_R(i)) end do + end if - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, idx1) + ! COLOR FUNCTION FLUX + if (surface_tension) then + flux_rs${XYZ}$_vf(j, k, l, c_idx) = & + (xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, c_idx) + & + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, c_idx))*s_S + end if - ! HYPOELASTIC STRESS EVOLUTION FLUX. - if (hypoelasticity) then + ! Geometrical source flux for cylindrical coordinates + #:if (NORM_DIR == 2) + if (cyl_coord) then + !Substituting the advective flux into the inviscid geometrical source flux $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & - xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & - xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(idx1)*tau_e_R(i)) + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do - end if - - ! REFERENCE MAP FLUX. - if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & - xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & - - rho_L*vel_L(idx1)*xi_field_L(i)) + & - xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*xi_field_R(i) & - - rho_R*vel_R(idx1)*xi_field_R(i)) + do i = intxb, intxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) = & + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) - p_Star + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do end if + #:endif + #:if (NORM_DIR == 3) + if (grid_geometry == 3) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, sys_size + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) = & + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) - p_Star - ! COLOR FUNCTION FLUX - if (surface_tension) then - flux_rs${XYZ}$_vf(j, k, l, c_idx) = & - (xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, c_idx) + & - xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, c_idx))*s_S + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) end if + #:endif - ! Geometrical source flux for cylindrical coordinates - #:if (NORM_DIR == 2) - if (cyl_coord) then - !Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = intxb, intxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) = & - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) - p_Star - ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - end if - #:endif - #:if (NORM_DIR == 3) - if (grid_geometry == 3) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, sys_size - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) = & - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) - p_Star - - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) - end if - #:endif - - end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() elseif (model_eqns == 4) then !ME4 - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, vel_avg_rms, nbub_L, nbub_R, ptilde_L, ptilde_R]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, vel_avg_rms, nbub_L, nbub_R, ptilde_L, ptilde_R]') + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end - vel_L_rms = 0._wp; vel_R_rms = 0._wp - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp - qv_L = 0._wp; qv_R = 0._wp + vel_L_rms = 0._wp; vel_R_rms = 0._wp + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp + qv_L = 0._wp; qv_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - qv_L = qv_L + alpha_rho_L(i)*qvs(i) - - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - qv_R = qv_R + alpha_rho_R(i)*qvs(i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + alpha_rho_L(i) + gamma_L = gamma_L + alpha_L(i)*gammas(i) + pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) + qv_L = qv_L + alpha_rho_L(i)*qvs(i) + + rho_R = rho_R + alpha_rho_R(i) + gamma_R = gamma_R + alpha_R(i)*gammas(i) + pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) + qv_R = qv_R + alpha_rho_R(i)*qvs(i) + end do + + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R + @:compute_average_state() - @:compute_average_state() + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + !> The computation of c_avg does not require all the variables, and therefore the non '_avg' + ! variables are placeholders to call the subroutine. - !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. + call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & + vel_avg_rms, 0._wp, c_avg) - call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, 0._wp, c_avg) + if (wave_speeds == 1) then + s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) + s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) - if (wave_speeds == 1) then - s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) - s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) + s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & + (s_L - vel_L(dir_idx(1))) - & + rho_R*vel_R(dir_idx(1))* & + (s_R - vel_R(dir_idx(1)))) & + /(rho_L*(s_L - vel_L(dir_idx(1))) - & + rho_R*(s_R - vel_R(dir_idx(1)))) + elseif (wave_speeds == 2) then + pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & + (vel_L(dir_idx(1)) - & + vel_R(dir_idx(1)))) - s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & - (s_L - vel_L(dir_idx(1))) - & - rho_R*vel_R(dir_idx(1))* & - (s_R - vel_R(dir_idx(1)))) & - /(rho_L*(s_L - vel_L(dir_idx(1))) - & - rho_R*(s_R - vel_R(dir_idx(1)))) - elseif (wave_speeds == 2) then - pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & - (vel_L(dir_idx(1)) - & - vel_R(dir_idx(1)))) - - pres_SR = pres_SL - - Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & - (pres_SL/pres_L - 1._wp)*pres_L/ & - ((pres_L + pi_inf_L/(1._wp + gamma_L))))) - Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & - (pres_SR/pres_R - 1._wp)*pres_R/ & - ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - - s_L = vel_L(dir_idx(1)) - c_L*Ms_L - s_R = vel_R(dir_idx(1)) + c_R*Ms_R - - s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & - (pres_L - pres_R)/ & - (rho_avg*c_avg)) - end if + pres_SR = pres_SL + + Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & + (pres_SL/pres_L - 1._wp)*pres_L/ & + ((pres_L + pi_inf_L/(1._wp + gamma_L))))) + Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & + (pres_SR/pres_R - 1._wp)*pres_R/ & + ((pres_R + pi_inf_R/(1._wp + gamma_R))))) + + s_L = vel_L(dir_idx(1)) - c_L*Ms_L + s_R = vel_R(dir_idx(1)) + c_R*Ms_R - ! follows Einfeldt et al. - ! s_M/P = min/max(0.,s_L/R) - s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & + (pres_L - pres_R)/ & + (rho_avg*c_avg)) + end if - ! goes with q_star_L/R = xi_L/R * (variable) - ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) - xi_L = (s_L - vel_L(dir_idx(1)))/(s_L - s_S) - xi_R = (s_R - vel_R(dir_idx(1)))/(s_R - s_S) + ! follows Einfeldt et al. + ! s_M/P = min/max(0.,s_L/R) + s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) - ! goes with numerical velocity in x/y/z directions - ! xi_P/M = 0.5 +/m sgn(0.5,s_star) - xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) - xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) + ! goes with q_star_L/R = xi_L/R * (variable) + ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) + xi_L = (s_L - vel_L(dir_idx(1)))/(s_L - s_S) + xi_R = (s_R - vel_R(dir_idx(1)))/(s_R - s_S) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*alpha_rho_L(i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*alpha_rho_R(i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end do + ! goes with numerical velocity in x/y/z directions + ! xi_P/M = 0.5 +/m sgn(0.5,s_star) + xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) + xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) - ! Momentum flux. - ! f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*alpha_rho_L(i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*alpha_rho_R(i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end do + + ! Momentum flux. + ! f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(i)) + & + s_M*(xi_L*(dir_flg(dir_idx(i))*s_S + & + (1._wp - dir_flg(dir_idx(i)))* & + vel_L(dir_idx(i))) - vel_L(dir_idx(i)))) + & + dir_flg(dir_idx(i))*pres_L) & + + xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(i)) + & + s_P*(xi_R*(dir_flg(dir_idx(i))*s_S + & + (1._wp - dir_flg(dir_idx(i)))* & + vel_R(dir_idx(i))) - vel_R(dir_idx(i)))) + & + dir_flg(dir_idx(i))*pres_R) + end do + + if (bubbles_euler) then + ! Put p_tilde in $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(i)) + & - s_M*(xi_L*(dir_flg(dir_idx(i))*s_S + & - (1._wp - dir_flg(dir_idx(i)))* & - vel_L(dir_idx(i))) - vel_L(dir_idx(i)))) + & - dir_flg(dir_idx(i))*pres_L) & - + xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(i)) + & - s_P*(xi_R*(dir_flg(dir_idx(i))*s_S + & - (1._wp - dir_flg(dir_idx(i)))* & - vel_R(dir_idx(i))) - vel_R(dir_idx(i)))) + & - dir_flg(dir_idx(i))*pres_R) + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) + & + xi_M*(dir_flg(dir_idx(i))*(-1._wp*ptilde_L)) & + + xi_P*(dir_flg(dir_idx(i))*(-1._wp*ptilde_R)) end do + end if - if (bubbles_euler) then - ! Put p_tilde in - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) + & - xi_M*(dir_flg(dir_idx(i))*(-1._wp*ptilde_L)) & - + xi_P*(dir_flg(dir_idx(i))*(-1._wp*ptilde_R)) - end do - end if + flux_rs${XYZ}$_vf(j, k, l, E_idx) = 0._wp - flux_rs${XYZ}$_vf(j, k, l, E_idx) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = alf_idx, alf_idx !only advect the void fraction + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end do + + ! Source for volume fraction advection equation + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + + vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = 0._wp + !IF ( (model_eqns == 4) .or. (num_fluids==1) ) vel_src_rs_vf(dir_idx(i))%sf(j,k,l) = 0._wp + end do + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) + + ! Add advection flux for bubble variables + if (bubbles_euler) then $:GPU_LOOP(parallelism='[seq]') - do i = alf_idx, alf_idx !only advect the void fraction + do i = bubxb, bubxe flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + + xi_P*nbub_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) end do + end if - ! Source for volume fraction advection equation - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - - vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = 0._wp - !IF ( (model_eqns == 4) .or. (num_fluids==1) ) vel_src_rs_vf(dir_idx(i))%sf(j,k,l) = 0._wp - end do - - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) + ! Geometrical source flux for cylindrical coordinates - ! Add advection flux for bubble variables - if (bubbles_euler) then + #:if (NORM_DIR == 2) + if (cyl_coord) then + ! Substituting the advective flux into the inviscid geometrical source flux + $:GPU_LOOP(parallelism='[seq]') + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(1)) = & + xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(1)) + & + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + + xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(1)) + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do + end if + #:endif + #:if (NORM_DIR == 3) + if (grid_geometry == 3) then $:GPU_LOOP(parallelism='[seq]') - do i = bubxb, bubxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*nbub_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + do i = 1, sys_size + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & + -xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(1)) + & + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + - xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(1)) + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) end if - - ! Geometrical source flux for cylindrical coordinates - - #:if (NORM_DIR == 2) - if (cyl_coord) then - ! Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(1)) = & - xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(1)) + & - s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - + xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(1)) + & - s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) - ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - end if - #:endif - #:if (NORM_DIR == 3) - if (grid_geometry == 3) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, sys_size - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & - -xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(1)) + & - s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - - xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(1)) + & - s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) - end if - #:endif - end do + #:endif end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() elseif (model_eqns == 2 .and. bubbles_euler) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, vel_R, rho_avg, alpha_L, alpha_R, h_avg, gamma_avg, s_L, s_R, s_S, nbub_L, nbub_R, ptilde_L, ptilde_R, vel_avg_rms, Re_L, Re_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, vel_R, rho_avg, alpha_L, alpha_R, h_avg, gamma_avg, s_L, s_R, s_S, nbub_L, nbub_R, ptilde_L, ptilde_R, vel_avg_rms, Re_L, Re_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + + vel_L_rms = 0._wp; vel_R_rms = 0._wp + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp + qv_L = 0._wp; qv_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - vel_L_rms = 0._wp; vel_R_rms = 0._wp - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp - qv_L = 0._wp; qv_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do + ! Retain this in the refactor + if (mpp_lim .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) + gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) end do - + else if (num_fluids > 2) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp + do i = 1, num_fluids - 1 + rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) + gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) end do + else + rho_L = qL_prim_rs${XYZ}$_vf(j, k, l, 1) + gamma_L = gammas(1) + pi_inf_L = pi_infs(1) + qv_L = qvs(1) + rho_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, 1) + gamma_R = gammas(1) + pi_inf_R = pi_infs(1) + qv_R = qvs(1) + end if - ! Retain this in the refactor - if (mpp_lim .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) - end do - else if (num_fluids > 2) then + if (viscous) then + if (num_fluids == 1) then ! Need to consider case with num_fluids >= 2 $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real + + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_L(i) = (1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q)))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = (1._wp - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q)))/Res_gs(i, q) & + + Re_R(i) + end do + + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do - else - rho_L = qL_prim_rs${XYZ}$_vf(j, k, l, 1) - gamma_L = gammas(1) - pi_inf_L = pi_infs(1) - qv_L = qvs(1) - rho_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, 1) - gamma_R = gammas(1) - pi_inf_R = pi_infs(1) - qv_R = qvs(1) end if + end if - if (viscous) then - if (num_fluids == 1) then ! Need to consider case with num_fluids >= 2 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = (1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q)))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = (1._wp - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q)))/Res_gs(i, q) & - + Re_R(i) - end do + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + if (avg_state == 2) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb + R0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, rs(i)) + R0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, rs(i)) + + V0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, vs(i)) + V0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, vs(i)) + if (.not. polytropic .and. .not. qbmm) then + P0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, ps(i)) + P0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, ps(i)) + end if + end do + if (.not. qbmm) then + if (adv_n) then + nbub_L = qL_prim_rs${XYZ}$_vf(j, k, l, n_idx) + nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, n_idx) + else + nbub_L_denom = 0._wp + nbub_R_denom = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb + nbub_L_denom = nbub_L_denom + (R0_L(i)**3._wp)*weight(i) + nbub_R_denom = nbub_R_denom + (R0_R(i)**3._wp)*weight(i) end do + nbub_L = (3._wp/(4._wp*pi))*qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)/nbub_L_denom + nbub_R = (3._wp/(4._wp*pi))*qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)/nbub_R_denom end if + else + !nb stored in 0th moment of first R0 bin in variable conversion module + nbub_L = qL_prim_rs${XYZ}$_vf(j, k, l, bubxb) + nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, bubxb) end if - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms - - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - - if (avg_state == 2) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - R0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, rs(i)) - R0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, rs(i)) - - V0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, vs(i)) - V0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, vs(i)) - if (.not. polytropic .and. .not. qbmm) then - P0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, ps(i)) - P0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, ps(i)) - end if - end do - + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb if (.not. qbmm) then - if (adv_n) then - nbub_L = qL_prim_rs${XYZ}$_vf(j, k, l, n_idx) - nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, n_idx) + if (polytropic) then + pbw_L(i) = f_cpbw_KM(R0(i), R0_L(i), V0_L(i), 0._wp) + pbw_R(i) = f_cpbw_KM(R0(i), R0_R(i), V0_R(i), 0._wp) else - nbub_L_denom = 0._wp - nbub_R_denom = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - nbub_L_denom = nbub_L_denom + (R0_L(i)**3._wp)*weight(i) - nbub_R_denom = nbub_R_denom + (R0_R(i)**3._wp)*weight(i) - end do - nbub_L = (3._wp/(4._wp*pi))*qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)/nbub_L_denom - nbub_R = (3._wp/(4._wp*pi))*qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)/nbub_R_denom + pbw_L(i) = f_cpbw_KM(R0(i), R0_L(i), V0_L(i), P0_L(i)) + pbw_R(i) = f_cpbw_KM(R0(i), R0_R(i), V0_R(i), P0_R(i)) end if - else - !nb stored in 0th moment of first R0 bin in variable conversion module - nbub_L = qL_prim_rs${XYZ}$_vf(j, k, l, bubxb) - nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, bubxb) end if + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - if (.not. qbmm) then - if (polytropic) then - pbw_L(i) = f_cpbw_KM(R0(i), R0_L(i), V0_L(i), 0._wp) - pbw_R(i) = f_cpbw_KM(R0(i), R0_R(i), V0_R(i), 0._wp) - else - pbw_L(i) = f_cpbw_KM(R0(i), R0_L(i), V0_L(i), P0_L(i)) - pbw_R(i) = f_cpbw_KM(R0(i), R0_R(i), V0_R(i), P0_R(i)) - end if - end if - end do + if (qbmm) then + PbwR3Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 4) + PbwR3Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 4) - if (qbmm) then - PbwR3Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 4) - PbwR3Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 4) + R3Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 1) + R3Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 1) - R3Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 1) - R3Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 1) + R3V2Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 3) + R3V2Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 3) + else - R3V2Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 3) - R3V2Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 3) - else + PbwR3Lbar = 0._wp + PbwR3Rbar = 0._wp - PbwR3Lbar = 0._wp - PbwR3Rbar = 0._wp + R3Lbar = 0._wp + R3Rbar = 0._wp - R3Lbar = 0._wp - R3Rbar = 0._wp + R3V2Lbar = 0._wp + R3V2Rbar = 0._wp - R3V2Lbar = 0._wp - R3V2Rbar = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb + PbwR3Lbar = PbwR3Lbar + pbw_L(i)*(R0_L(i)**3._wp)*weight(i) + PbwR3Rbar = PbwR3Rbar + pbw_R(i)*(R0_R(i)**3._wp)*weight(i) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - PbwR3Lbar = PbwR3Lbar + pbw_L(i)*(R0_L(i)**3._wp)*weight(i) - PbwR3Rbar = PbwR3Rbar + pbw_R(i)*(R0_R(i)**3._wp)*weight(i) + R3Lbar = R3Lbar + (R0_L(i)**3._wp)*weight(i) + R3Rbar = R3Rbar + (R0_R(i)**3._wp)*weight(i) - R3Lbar = R3Lbar + (R0_L(i)**3._wp)*weight(i) - R3Rbar = R3Rbar + (R0_R(i)**3._wp)*weight(i) + R3V2Lbar = R3V2Lbar + (R0_L(i)**3._wp)*(V0_L(i)**2._wp)*weight(i) + R3V2Rbar = R3V2Rbar + (R0_R(i)**3._wp)*(V0_R(i)**2._wp)*weight(i) + end do + end if - R3V2Lbar = R3V2Lbar + (R0_L(i)**3._wp)*(V0_L(i)**2._wp)*weight(i) - R3V2Rbar = R3V2Rbar + (R0_R(i)**3._wp)*(V0_R(i)**2._wp)*weight(i) - end do - end if + if (qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids) < small_alf .or. R3Lbar < small_alf) then + ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*pres_L + else + ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*(pres_L - PbwR3Lbar/R3Lbar - & + rho_L*R3V2Lbar/R3Lbar) + end if - if (qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids) < small_alf .or. R3Lbar < small_alf) then - ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*pres_L - else - ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*(pres_L - PbwR3Lbar/R3Lbar - & - rho_L*R3V2Lbar/R3Lbar) - end if + if (qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids) < small_alf .or. R3Rbar < small_alf) then + ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*pres_R + else + ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*(pres_R - PbwR3Rbar/R3Rbar - & + rho_R*R3V2Rbar/R3Rbar) + end if - if (qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids) < small_alf .or. R3Rbar < small_alf) then - ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*pres_R - else - ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*(pres_R - PbwR3Rbar/R3Rbar - & - rho_R*R3V2Rbar/R3Rbar) - end if + if ((.not. f_approx_equal(ptilde_L, ptilde_L)) .or. (.not. f_approx_equal(ptilde_R, ptilde_R))) then + end if - if ((.not. f_approx_equal(ptilde_L, ptilde_L)) .or. (.not. f_approx_equal(ptilde_R, ptilde_R))) then - end if + rho_avg = 5.e-1_wp*(rho_L + rho_R) + H_avg = 5.e-1_wp*(H_L + H_R) + gamma_avg = 5.e-1_wp*(gamma_L + gamma_R) + vel_avg_rms = 0._wp - rho_avg = 5.e-1_wp*(rho_L + rho_R) - H_avg = 5.e-1_wp*(H_L + H_R) - gamma_avg = 5.e-1_wp*(gamma_L + gamma_R) - vel_avg_rms = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_avg_rms = vel_avg_rms + (5.e-1_wp*(vel_L(i) + vel_R(i)))**2._wp + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_avg_rms = vel_avg_rms + (5.e-1_wp*(vel_L(i) + vel_R(i)))**2._wp - end do + end if - end if + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + !> The computation of c_avg does not require all the variables, and therefore the non '_avg' + ! variables are placeholders to call the subroutine. + call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & + vel_avg_rms, 0._wp, c_avg) - !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. - call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, 0._wp, c_avg) + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) + end do + end if - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) - end do - end if + ! Low Mach correction + if (low_Mach == 2) then + @:compute_low_Mach_correction() + end if - ! Low Mach correction - if (low_Mach == 2) then - @:compute_low_Mach_correction() - end if + if (wave_speeds == 1) then + s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) + s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) - if (wave_speeds == 1) then - s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) - s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) + s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & + (s_L - vel_L(dir_idx(1))) - & + rho_R*vel_R(dir_idx(1))* & + (s_R - vel_R(dir_idx(1)))) & + /(rho_L*(s_L - vel_L(dir_idx(1))) - & + rho_R*(s_R - vel_R(dir_idx(1)))) + elseif (wave_speeds == 2) then + pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & + (vel_L(dir_idx(1)) - & + vel_R(dir_idx(1)))) - s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & - (s_L - vel_L(dir_idx(1))) - & - rho_R*vel_R(dir_idx(1))* & - (s_R - vel_R(dir_idx(1)))) & - /(rho_L*(s_L - vel_L(dir_idx(1))) - & - rho_R*(s_R - vel_R(dir_idx(1)))) - elseif (wave_speeds == 2) then - pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & - (vel_L(dir_idx(1)) - & - vel_R(dir_idx(1)))) - - pres_SR = pres_SL - - Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & - (pres_SL/pres_L - 1._wp)*pres_L/ & - ((pres_L + pi_inf_L/(1._wp + gamma_L))))) - Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & - (pres_SR/pres_R - 1._wp)*pres_R/ & - ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - - s_L = vel_L(dir_idx(1)) - c_L*Ms_L - s_R = vel_R(dir_idx(1)) + c_R*Ms_R - - s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & - (pres_L - pres_R)/ & - (rho_avg*c_avg)) - end if + pres_SR = pres_SL - ! follows Einfeldt et al. - ! s_M/P = min/max(0.,s_L/R) - s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & + (pres_SL/pres_L - 1._wp)*pres_L/ & + ((pres_L + pi_inf_L/(1._wp + gamma_L))))) + Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & + (pres_SR/pres_R - 1._wp)*pres_R/ & + ((pres_R + pi_inf_R/(1._wp + gamma_R))))) + + s_L = vel_L(dir_idx(1)) - c_L*Ms_L + s_R = vel_R(dir_idx(1)) + c_R*Ms_R - ! goes with q_star_L/R = xi_L/R * (variable) - ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) - xi_L = (s_L - vel_L(dir_idx(1)))/(s_L - s_S) - xi_R = (s_R - vel_R(dir_idx(1)))/(s_R - s_S) + s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & + (pres_L - pres_R)/ & + (rho_avg*c_avg)) + end if - ! goes with numerical velocity in x/y/z directions - ! xi_P/M = 0.5 +/m sgn(0.5,s_star) - xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) - xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) + ! follows Einfeldt et al. + ! s_M/P = min/max(0.,s_L/R) + s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) - ! Low Mach correction - if (low_Mach == 1) then - @:compute_low_Mach_correction() - else - pcorr = 0._wp - end if + ! goes with q_star_L/R = xi_L/R * (variable) + ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) + xi_L = (s_L - vel_L(dir_idx(1)))/(s_L - s_S) + xi_R = (s_R - vel_R(dir_idx(1)))/(s_R - s_S) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end do + ! goes with numerical velocity in x/y/z directions + ! xi_P/M = 0.5 +/m sgn(0.5,s_star) + xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) + xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) - if (bubbles_euler .and. (num_fluids > 1)) then - ! Kill mass transport @ gas density - flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp - end if + ! Low Mach correction + if (low_Mach == 1) then + @:compute_low_Mach_correction() + else + pcorr = 0._wp + end if - ! Momentum flux. - ! f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end do - ! Include p_tilde + if (bubbles_euler .and. (num_fluids > 1)) then + ! Kill mass transport @ gas density + flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(i)) + & - s_M*(xi_L*(dir_flg(dir_idx(i))*s_S + & - (1._wp - dir_flg(dir_idx(i)))* & - vel_L(dir_idx(i))) - vel_L(dir_idx(i)))) + & - dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & - + xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(i)) + & - s_P*(xi_R*(dir_flg(dir_idx(i))*s_S + & - (1._wp - dir_flg(dir_idx(i)))* & - vel_R(dir_idx(i))) - vel_R(dir_idx(i)))) + & - dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & - + (s_M/s_L)*(s_P/s_R)*dir_flg(dir_idx(i))*pcorr - end do + ! Momentum flux. + ! f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) - ! Energy flux. - ! f = u*(E+p), q = E, q_star = \xi*E+(s-u)(\rho s_star + p/(s-u)) - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - xi_M*(vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) + & - s_M*(xi_L*(E_L + (s_S - vel_L(dir_idx(1)))* & - (rho_L*s_S + (pres_L - ptilde_L)/ & - (s_L - vel_L(dir_idx(1))))) - E_L)) & - + xi_P*(vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) + & - s_P*(xi_R*(E_R + (s_S - vel_R(dir_idx(1)))* & - (rho_R*s_S + (pres_R - ptilde_R)/ & - (s_R - vel_R(dir_idx(1))))) - E_R)) & - + (s_M/s_L)*(s_P/s_R)*pcorr*s_S - - ! Volume fraction flux - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end do + ! Include p_tilde - ! Source for volume fraction advection equation - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = & - xi_M*(vel_L(dir_idx(i)) + & - dir_flg(dir_idx(i))* & - s_M*(xi_L - 1._wp)) & - + xi_P*(vel_R(dir_idx(i)) + & - dir_flg(dir_idx(i))* & - s_P*(xi_R - 1._wp)) - - !IF ( (model_eqns == 4) .or. (num_fluids==1) ) vel_src_rs_vf(idxi)%sf(j,k,l) = 0._wp - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(i)) + & + s_M*(xi_L*(dir_flg(dir_idx(i))*s_S + & + (1._wp - dir_flg(dir_idx(i)))* & + vel_L(dir_idx(i))) - vel_L(dir_idx(i)))) + & + dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & + + xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(i)) + & + s_P*(xi_R*(dir_flg(dir_idx(i))*s_S + & + (1._wp - dir_flg(dir_idx(i)))* & + vel_R(dir_idx(i))) - vel_R(dir_idx(i)))) + & + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & + + (s_M/s_L)*(s_P/s_R)*dir_flg(dir_idx(i))*pcorr + end do - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) + ! Energy flux. + ! f = u*(E+p), q = E, q_star = \xi*E+(s-u)(\rho s_star + p/(s-u)) + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + xi_M*(vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) + & + s_M*(xi_L*(E_L + (s_S - vel_L(dir_idx(1)))* & + (rho_L*s_S + (pres_L - ptilde_L)/ & + (s_L - vel_L(dir_idx(1))))) - E_L)) & + + xi_P*(vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) + & + s_P*(xi_R*(E_R + (s_S - vel_R(dir_idx(1)))* & + (rho_R*s_S + (pres_R - ptilde_R)/ & + (s_R - vel_R(dir_idx(1))))) - E_R)) & + + (s_M/s_L)*(s_P/s_R)*pcorr*s_S + + ! Volume fraction flux + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end do - ! Add advection flux for bubble variables - $:GPU_LOOP(parallelism='[seq]') - do i = bubxb, bubxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*nbub_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end do + ! Source for volume fraction advection equation + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = & + xi_M*(vel_L(dir_idx(i)) + & + dir_flg(dir_idx(i))* & + s_M*(xi_L - 1._wp)) & + + xi_P*(vel_R(dir_idx(i)) + & + dir_flg(dir_idx(i))* & + s_P*(xi_R - 1._wp)) + + !IF ( (model_eqns == 4) .or. (num_fluids==1) ) vel_src_rs_vf(idxi)%sf(j,k,l) = 0._wp + end do - if (qbmm) then - flux_rs${XYZ}$_vf(j, k, l, bubxb) = & - xi_M*nbub_L & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*nbub_R & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end if + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) - if (adv_n) then - flux_rs${XYZ}$_vf(j, k, l, n_idx) = & - xi_M*nbub_L & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*nbub_R & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + ! Add advection flux for bubble variables + $:GPU_LOOP(parallelism='[seq]') + do i = bubxb, bubxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*nbub_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end do + + if (qbmm) then + flux_rs${XYZ}$_vf(j, k, l, bubxb) = & + xi_M*nbub_L & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*nbub_R & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end if + + if (adv_n) then + flux_rs${XYZ}$_vf(j, k, l, n_idx) = & + xi_M*nbub_L & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*nbub_R & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end if + + ! Geometrical source flux for cylindrical coordinates + #:if (NORM_DIR == 2) + if (cyl_coord) then + ! Substituting the advective flux into the inviscid geometrical source flux + $:GPU_LOOP(parallelism='[seq]') + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(1)) = & + xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(1)) + & + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + + xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(1)) + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do end if + #:endif + #:if (NORM_DIR == 3) + if (grid_geometry == 3) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, sys_size + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do - ! Geometrical source flux for cylindrical coordinates - #:if (NORM_DIR == 2) - if (cyl_coord) then - ! Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(1)) = & - xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(1)) + & - s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - + xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(1)) + & - s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) - ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - end if - #:endif - #:if (NORM_DIR == 3) - if (grid_geometry == 3) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, sys_size - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & - -xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(1)) + & - s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - - xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(1)) + & - s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & + -xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(1)) + & + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + - xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(1)) + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) - end if - #:endif - end do + end if + #:endif end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() else ! 5-EQUATION MODEL WITH HLLC - #:call GPU_PARALLEL_LOOP(collapse=3, private='[T_L, T_R, vel_L, vel_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms, pcorr, zcoef, vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg,Phi_avg, h_iL, h_iR, h_avg_2]', copyin='[is1, is2, is3]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,T_L, T_R, vel_L, vel_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms, pcorr, zcoef, vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg,Phi_avg, h_iL, h_iR, h_avg_2]', copyin='[is1, is2, is3]') + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + + !idx1 = 1; if (dir_idx(1) == 2) idx1 = 2; if (dir_idx(1) == 3) idx1 = 3 + + vel_L_rms = 0._wp; vel_R_rms = 0._wp + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp + qv_L = 0._wp; qv_R = 0._wp + alpha_L_sum = 0._wp; alpha_R_sum = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - !idx1 = 1; if (dir_idx(1) == 2) idx1 = 2; if (dir_idx(1) == 3) idx1 = 3 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do - vel_L_rms = 0._wp; vel_R_rms = 0._wp - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp - qv_L = 0._wp; qv_R = 0._wp - alpha_L_sum = 0._wp; alpha_R_sum = 0._wp + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + ! Change this by splitting it into the cases + ! present in the bubbles_euler + if (mpp_lim) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) + alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) end do $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp + do i = 1, num_fluids + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) end do + end if - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - - ! Change this by splitting it into the cases - ! present in the bubbles_euler - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) - alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) - end do - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) + gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + + rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + end do + if (viscous) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) - end do + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp - if (viscous) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real - - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & - + Re_R(i) - end do - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + do q = 1, Re_size(i) + Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + + Re_R(i) end do - end if + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do + end if - if (chemistry) then - c_sum_Yi_Phi = 0.0_wp - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do + if (chemistry) then + c_sum_Yi_Phi = 0.0_wp + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - call get_mixture_molecular_weight(Ys_L, MW_L) - call get_mixture_molecular_weight(Ys_R, MW_R) + call get_mixture_molecular_weight(Ys_L, MW_L) + call get_mixture_molecular_weight(Ys_R, MW_R) - Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) - R_gas_L = gas_constant/MW_L - R_gas_R = gas_constant/MW_R + R_gas_L = gas_constant/MW_L + R_gas_R = gas_constant/MW_R - T_L = pres_L/rho_L/R_gas_L - T_R = pres_R/rho_R/R_gas_R + T_L = pres_L/rho_L/R_gas_L + T_R = pres_R/rho_R/R_gas_R - call get_species_specific_heats_r(T_L, Cp_iL) - call get_species_specific_heats_r(T_R, Cp_iR) + call get_species_specific_heats_r(T_L, Cp_iL) + call get_species_specific_heats_r(T_R, Cp_iR) - if (chem_params%gamma_method == 1) then - !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. - Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) - Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) + if (chem_params%gamma_method == 1) then + !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. + Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) + Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) - gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) - gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) - else if (chem_params%gamma_method == 2) then - !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) - call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) - call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) - call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) + gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) + gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) + else if (chem_params%gamma_method == 2) then + !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. + call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) + call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) + call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) + call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) - Gamm_L = Cp_L/Cv_L; Gamm_R = Cp_R/Cv_R - gamma_L = 1.0_wp/(Gamm_L - 1.0_wp); gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) - end if + Gamm_L = Cp_L/Cv_L; Gamm_R = Cp_R/Cv_R + gamma_L = 1.0_wp/(Gamm_L - 1.0_wp); gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) + end if - call get_mixture_energy_mass(T_L, Ys_L, E_L) - call get_mixture_energy_mass(T_R, Ys_R, E_R) + call get_mixture_energy_mass(T_L, Ys_L, E_L) + call get_mixture_energy_mass(T_R, Ys_R, E_R) - E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms - E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - else - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R + E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms + E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + else + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - end if + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + end if - ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - end do - G_L = 0._wp - G_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - ! Elastic contribution to energy if G large enough - if ((G_L > verysmall) .and. (G_R > verysmall)) then + ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY + if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + end do + G_L = 0._wp + G_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + ! Elastic contribution to energy if G large enough + if ((G_L > verysmall) .and. (G_R > verysmall)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + ! Additional terms in 2D and 3D + if ((i == 2) .or. (i == 4) .or. (i == 5)) then E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Additional terms in 2D and 3D - if ((i == 2) .or. (i == 4) .or. (i == 5)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if end if - end do - end if - - ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY - if (hyperelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) - xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) - end do - G_L = 0._wp - G_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - ! Mixture left and right shear modulus - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - ! Elastic contribution to energy if G large enough - if (G_L > verysmall .and. G_R > verysmall) then - E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) - E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, b_size - 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - end do + end do + end if + + ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY + if (hyperelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) + xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) + end do + G_L = 0._wp + G_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + ! Mixture left and right shear modulus + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) + end do + ! Elastic contribution to energy if G large enough + if (G_L > verysmall .and. G_R > verysmall) then + E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) + E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, b_size - 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + end do + end if - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R - @:compute_average_state() + @:compute_average_state() - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) - !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. - call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, c_sum_Yi_Phi, c_avg) + !> The computation of c_avg does not require all the variables, and therefore the non '_avg' + ! variables are placeholders to call the subroutine. + call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & + vel_avg_rms, c_sum_Yi_Phi, c_avg) - if (viscous) then - if (chemistry) then - call compute_viscosity_and_inversion(T_L, Ys_L, T_R, Ys_R, Re_L(1), Re_R(1)) - end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) - end do + if (viscous) then + if (chemistry) then + call compute_viscosity_and_inversion(T_L, Ys_L, T_R, Ys_R, Re_L(1), Re_R(1)) end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) + end do + end if - ! Low Mach correction - if (low_Mach == 2) then - @:compute_low_Mach_correction() - end if + ! Low Mach correction + if (low_Mach == 2) then + @:compute_low_Mach_correction() + end if - if (wave_speeds == 1) then - if (elasticity) then - s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L), vel_R(dir_idx(1)) - sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R)) - s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R), vel_L(dir_idx(1)) + sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L)) - s_S = (pres_R - tau_e_R(dir_idx_tau(1)) - pres_L + & - tau_e_L(dir_idx_tau(1)) + rho_L*vel_L(idx1)*(s_L - vel_L(idx1)) - & - rho_R*vel_R(idx1)*(s_R - vel_R(idx1)))/(rho_L*(s_L - vel_L(idx1)) - & - rho_R*(s_R - vel_R(idx1))) - else - s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) - s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) - s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & - (s_L - vel_L(dir_idx(1))) - rho_R*vel_R(dir_idx(1))*(s_R - vel_R(dir_idx(1)))) & - /(rho_L*(s_L - vel_L(dir_idx(1))) - rho_R*(s_R - vel_R(dir_idx(1)))) + if (wave_speeds == 1) then + if (elasticity) then + s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L), vel_R(dir_idx(1)) - sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R)) + s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R), vel_L(dir_idx(1)) + sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L)) + s_S = (pres_R - tau_e_R(dir_idx_tau(1)) - pres_L + & + tau_e_L(dir_idx_tau(1)) + rho_L*vel_L(idx1)*(s_L - vel_L(idx1)) - & + rho_R*vel_R(idx1)*(s_R - vel_R(idx1)))/(rho_L*(s_L - vel_L(idx1)) - & + rho_R*(s_R - vel_R(idx1))) + else + s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) + s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) + s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & + (s_L - vel_L(dir_idx(1))) - rho_R*vel_R(dir_idx(1))*(s_R - vel_R(dir_idx(1)))) & + /(rho_L*(s_L - vel_L(dir_idx(1))) - rho_R*(s_R - vel_R(dir_idx(1)))) - end if - elseif (wave_speeds == 2) then - pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & - (vel_L(idx1) - & - vel_R(idx1))) - - pres_SR = pres_SL - - Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & - (pres_SL/pres_L - 1._wp)*pres_L/ & - ((pres_L + pi_inf_L/(1._wp + gamma_L))))) - Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & - (pres_SR/pres_R - 1._wp)*pres_R/ & - ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - - s_L = vel_L(idx1) - c_L*Ms_L - s_R = vel_R(idx1) + c_R*Ms_R - - s_S = 5.e-1_wp*((vel_L(idx1) + vel_R(idx1)) + & - (pres_L - pres_R)/ & - (rho_avg*c_avg)) end if + elseif (wave_speeds == 2) then + pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & + (vel_L(idx1) - & + vel_R(idx1))) - ! follows Einfeldt et al. - ! s_M/P = min/max(0.,s_L/R) - s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + pres_SR = pres_SL - ! goes with q_star_L/R = xi_L/R * (variable) - ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) - xi_L = (s_L - vel_L(idx1))/(s_L - s_S) - xi_R = (s_R - vel_R(idx1))/(s_R - s_S) + Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & + (pres_SL/pres_L - 1._wp)*pres_L/ & + ((pres_L + pi_inf_L/(1._wp + gamma_L))))) + Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & + (pres_SR/pres_R - 1._wp)*pres_R/ & + ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - ! goes with numerical velocity in x/y/z directions - ! xi_P/M = 0.5 +/m sgn(0.5,s_star) - xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) - xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) + s_L = vel_L(idx1) - c_L*Ms_L + s_R = vel_R(idx1) + c_R*Ms_R - ! Low Mach correction - if (low_Mach == 1) then - @:compute_low_Mach_correction() - else - pcorr = 0._wp - end if + s_S = 5.e-1_wp*((vel_L(idx1) + vel_R(idx1)) + & + (pres_L - pres_R)/ & + (rho_avg*c_avg)) + end if - ! COMPUTING THE HLLC FLUXES - ! MASS FLUX. - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(idx1) + s_P*(xi_R - 1._wp)) - end do + ! follows Einfeldt et al. + ! s_M/P = min/max(0.,s_L/R) + s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + + ! goes with q_star_L/R = xi_L/R * (variable) + ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) + xi_L = (s_L - vel_L(idx1))/(s_L - s_S) + xi_R = (s_R - vel_R(idx1))/(s_R - s_S) + + ! goes with numerical velocity in x/y/z directions + ! xi_P/M = 0.5 +/m sgn(0.5,s_star) + xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) + xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) + + ! Low Mach correction + if (low_Mach == 1) then + @:compute_low_Mach_correction() + else + pcorr = 0._wp + end if + + ! COMPUTING THE HLLC FLUXES + ! MASS FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(idx1) + s_P*(xi_R - 1._wp)) + end do - ! MOMENTUM FLUX. - ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + ! MOMENTUM FLUX. + ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + idxi = dir_idx(i) + flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & + xi_M*(rho_L*(vel_L(idx1)* & + vel_L(idxi) + & + s_M*(xi_L*(dir_flg(idxi)*s_S + & + (1._wp - dir_flg(idxi))* & + vel_L(idxi)) - vel_L(idxi))) + & + dir_flg(idxi)*(pres_L)) & + + xi_P*(rho_R*(vel_R(idx1)* & + vel_R(idxi) + & + s_P*(xi_R*(dir_flg(idxi)*s_S + & + (1._wp - dir_flg(idxi))* & + vel_R(idxi)) - vel_R(idxi))) + & + dir_flg(idxi)*(pres_R)) & + + (s_M/s_L)*(s_P/s_R)*dir_flg(idxi)*pcorr + end do + + ! ENERGY FLUX. + ! f = u*(E-\sigma), q = E, q_star = \xi*E+(s-u)(\rho s_star - \sigma/(s-u)) + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + xi_M*(vel_L(idx1)*(E_L + pres_L) + & + s_M*(xi_L*(E_L + (s_S - vel_L(idx1))* & + (rho_L*s_S + pres_L/ & + (s_L - vel_L(idx1)))) - E_L)) & + + xi_P*(vel_R(idx1)*(E_R + pres_R) + & + s_P*(xi_R*(E_R + (s_S - vel_R(idx1))* & + (rho_R*s_S + pres_R/ & + (s_R - vel_R(idx1)))) - E_R)) & + + (s_M/s_L)*(s_P/s_R)*pcorr*s_S + + ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux + if (elasticity) then + flux_ene_e = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims idxi = dir_idx(i) + ! MOMENTUM ELASTIC FLUX. flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & - xi_M*(rho_L*(vel_L(idx1)* & - vel_L(idxi) + & - s_M*(xi_L*(dir_flg(idxi)*s_S + & - (1._wp - dir_flg(idxi))* & - vel_L(idxi)) - vel_L(idxi))) + & - dir_flg(idxi)*(pres_L)) & - + xi_P*(rho_R*(vel_R(idx1)* & - vel_R(idxi) + & - s_P*(xi_R*(dir_flg(idxi)*s_S + & - (1._wp - dir_flg(idxi))* & - vel_R(idxi)) - vel_R(idxi))) + & - dir_flg(idxi)*(pres_R)) & - + (s_M/s_L)*(s_P/s_R)*dir_flg(idxi)*pcorr + flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) & + - xi_M*tau_e_L(dir_idx_tau(i)) - xi_P*tau_e_R(dir_idx_tau(i)) + ! ENERGY ELASTIC FLUX. + flux_ene_e = flux_ene_e - & + xi_M*(vel_L(idxi)*tau_e_L(dir_idx_tau(i)) + & + s_M*(xi_L*((s_S - vel_L(i))*(tau_e_L(dir_idx_tau(i))/(s_L - vel_L(i)))))) - & + xi_P*(vel_R(idxi)*tau_e_R(dir_idx_tau(i)) + & + s_P*(xi_R*((s_S - vel_R(i))*(tau_e_R(dir_idx_tau(i))/(s_R - vel_R(i)))))) end do + flux_rs${XYZ}$_vf(j, k, l, E_idx) = flux_rs${XYZ}$_vf(j, k, l, E_idx) + flux_ene_e + end if - ! ENERGY FLUX. - ! f = u*(E-\sigma), q = E, q_star = \xi*E+(s-u)(\rho s_star - \sigma/(s-u)) - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - xi_M*(vel_L(idx1)*(E_L + pres_L) + & - s_M*(xi_L*(E_L + (s_S - vel_L(idx1))* & - (rho_L*s_S + pres_L/ & - (s_L - vel_L(idx1)))) - E_L)) & - + xi_P*(vel_R(idx1)*(E_R + pres_R) + & - s_P*(xi_R*(E_R + (s_S - vel_R(idx1))* & - (rho_R*s_S + pres_R/ & - (s_R - vel_R(idx1)))) - E_R)) & - + (s_M/s_L)*(s_P/s_R)*pcorr*s_S - - ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux - if (elasticity) then - flux_ene_e = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - idxi = dir_idx(i) - ! MOMENTUM ELASTIC FLUX. - flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & - flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) & - - xi_M*tau_e_L(dir_idx_tau(i)) - xi_P*tau_e_R(dir_idx_tau(i)) - ! ENERGY ELASTIC FLUX. - flux_ene_e = flux_ene_e - & - xi_M*(vel_L(idxi)*tau_e_L(dir_idx_tau(i)) + & - s_M*(xi_L*((s_S - vel_L(i))*(tau_e_L(dir_idx_tau(i))/(s_L - vel_L(i)))))) - & - xi_P*(vel_R(idxi)*tau_e_R(dir_idx_tau(i)) + & - s_P*(xi_R*((s_S - vel_R(i))*(tau_e_R(dir_idx_tau(i))/(s_R - vel_R(i)))))) - end do - flux_rs${XYZ}$_vf(j, k, l, E_idx) = flux_rs${XYZ}$_vf(j, k, l, E_idx) + flux_ene_e - end if - - ! HYPOELASTIC STRESS EVOLUTION FLUX. - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & - xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & - xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(idx1)*tau_e_R(i)) - end do - end if - - ! VOLUME FRACTION FLUX. + ! HYPOELASTIC STRESS EVOLUTION FLUX. + if (hypoelasticity) then $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(idx1) + s_P*(xi_R - 1._wp)) + do i = 1, strxe - strxb + 1 + flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & + xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & + xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(idx1)*tau_e_R(i)) end do + end if + + ! VOLUME FRACTION FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(idx1) + s_P*(xi_R - 1._wp)) + end do + + ! VOLUME FRACTION SOURCE FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + idxi = dir_idx(i) + vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & + xi_M*(vel_L(idxi) + & + dir_flg(idxi)* & + s_M*(xi_L - 1._wp)) & + + xi_P*(vel_R(idxi) + & + dir_flg(idxi)* & + s_P*(xi_R - 1._wp)) + end do + + ! COLOR FUNCTION FLUX + if (surface_tension) then + flux_rs${XYZ}$_vf(j, k, l, c_idx) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, c_idx) & + *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, c_idx) & + *(vel_R(idx1) + s_P*(xi_R - 1._wp)) + end if - ! VOLUME FRACTION SOURCE FLUX. + ! REFERENCE MAP FLUX. + if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - idxi = dir_idx(i) - vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & - xi_M*(vel_L(idxi) + & - dir_flg(idxi)* & - s_M*(xi_L - 1._wp)) & - + xi_P*(vel_R(idxi) + & - dir_flg(idxi)* & - s_P*(xi_R - 1._wp)) + flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & + xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & + - rho_L*vel_L(idx1)*xi_field_L(i)) + & + xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*xi_field_R(i) & + - rho_R*vel_R(idx1)*xi_field_R(i)) end do + end if - ! COLOR FUNCTION FLUX - if (surface_tension) then - flux_rs${XYZ}$_vf(j, k, l, c_idx) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, c_idx) & - *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, c_idx) & - *(vel_R(idx1) + s_P*(xi_R - 1._wp)) - end if + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, idx1) - ! REFERENCE MAP FLUX. - if (hyperelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & - xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & - - rho_L*vel_L(idx1)*xi_field_L(i)) + & - xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*xi_field_R(i) & - - rho_R*vel_R(idx1)*xi_field_R(i)) - end do - end if + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, idx1) + flux_rs${XYZ}$_vf(j, k, l, i) = xi_M*rho_L*Y_L*(vel_L(idx1) + s_M*(xi_L - 1._wp)) & + + xi_P*rho_R*Y_R*(vel_R(idx1) + s_P*(xi_R - 1._wp)) + flux_src_rs${XYZ}$_vf(j, k, l, i) = 0.0_wp + end do + end if - if (chemistry) then + ! Geometrical source flux for cylindrical coordinates + #:if (NORM_DIR == 2) + if (cyl_coord) then + !Substituting the advective flux into the inviscid geometrical source flux $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - - flux_rs${XYZ}$_vf(j, k, l, i) = xi_M*rho_L*Y_L*(vel_L(idx1) + s_M*(xi_L - 1._wp)) & - + xi_P*rho_R*Y_R*(vel_R(idx1) + s_P*(xi_R - 1._wp)) - flux_src_rs${XYZ}$_vf(j, k, l, i) = 0.0_wp + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + idx1) = & + xi_M*(rho_L*(vel_L(idx1)* & + vel_L(idx1) + & + s_M*(xi_L*(dir_flg(idx1)*s_S + & + (1._wp - dir_flg(idx1))* & + vel_L(idx1)) - vel_L(idx1)))) & + + xi_P*(rho_R*(vel_R(idx1)* & + vel_R(idx1) + & + s_P*(xi_R*(dir_flg(idx1)*s_S + & + (1._wp - dir_flg(idx1))* & + vel_R(idx1)) - vel_R(idx1)))) + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do end if + #:endif + #:if (NORM_DIR == 3) + if (grid_geometry == 3) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, sys_size + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do - ! Geometrical source flux for cylindrical coordinates - #:if (NORM_DIR == 2) - if (cyl_coord) then - !Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + idx1) = & - xi_M*(rho_L*(vel_L(idx1)* & - vel_L(idx1) + & - s_M*(xi_L*(dir_flg(idx1)*s_S + & - (1._wp - dir_flg(idx1))* & - vel_L(idx1)) - vel_L(idx1)))) & - + xi_P*(rho_R*(vel_R(idx1)* & - vel_R(idx1) + & - s_P*(xi_R*(dir_flg(idx1)*s_S + & - (1._wp - dir_flg(idx1))* & - vel_R(idx1)) - vel_R(idx1)))) - ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - end if - #:endif - #:if (NORM_DIR == 3) - if (grid_geometry == 3) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, sys_size - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & - -xi_M*(rho_L*(vel_L(idx1)* & - vel_L(idx1) + & - s_M*(xi_L*(dir_flg(idx1)*s_S + & - (1._wp - dir_flg(idx1))* & - vel_L(idx1)) - vel_L(idx1)))) & - - xi_P*(rho_R*(vel_R(idx1)* & - vel_R(idx1) + & - s_P*(xi_R*(dir_flg(idx1)*s_S + & - (1._wp - dir_flg(idx1))* & - vel_R(idx1)) - vel_R(idx1)))) - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & + -xi_M*(rho_L*(vel_L(idx1)* & + vel_L(idx1) + & + s_M*(xi_L*(dir_flg(idx1)*s_S + & + (1._wp - dir_flg(idx1))* & + vel_L(idx1)) - vel_L(idx1)))) & + - xi_P*(rho_R*(vel_R(idx1)* & + vel_R(idx1) + & + s_P*(xi_R*(dir_flg(idx1)*s_S + & + (1._wp - dir_flg(idx1))* & + vel_R(idx1)) - vel_R(idx1)))) + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) - end if - #:endif + end if + #:endif - end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end if #:endfor @@ -3655,179 +3655,179 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then #:block UNDEF_AMD - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel, alpha_L, alpha_R, rho, pres,E, H_no_mag, gamma, pi_inf, qv, vel_rms, B, c, c_fast, pres_mag, U_L, U_R, U_starL, U_starR, U_doubleL, U_doubleR, F_L, F_R, F_starL, F_starR, F_hlld]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - - ! (1) Extract the left/right primitive states - do i = 1, contxe - alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_rho_L, alpha_rho_R, vel, alpha_L, alpha_R, rho, pres,E, H_no_mag, gamma, pi_inf, qv, vel_rms, B, c, c_fast, pres_mag, U_L, U_R, U_starL, U_starR, U_doubleL, U_doubleR, F_L, F_R, F_starL, F_starR, F_hlld]') + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end - ! NOTE: unlike HLL & HLLC, vel_L here is permutated by dir_idx for simpler logic - do i = 1, num_vels - vel%L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) - vel%R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + dir_idx(i)) - end do + ! (1) Extract the left/right primitive states + do i = 1, contxe + alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - vel_rms%L = sum(vel%L**2._wp) - vel_rms%R = sum(vel%R**2._wp) + ! NOTE: unlike HLL & HLLC, vel_L here is permutated by dir_idx for simpler logic + do i = 1, num_vels + vel%L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) + vel%R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + dir_idx(i)) + end do - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do + vel_rms%L = sum(vel%L**2._wp) + vel_rms%R = sum(vel%R**2._wp) - pres%L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres%R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - - ! NOTE: unlike HLL, Bx, By, Bz are permutated by dir_idx for simpler logic - if (mhd) then - if (n == 0) then ! 1D: constant Bx; By, Bz as variables; only in x so not permutated - B%L = [Bx0, qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg), qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1)] - B%R = [Bx0, qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg), qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1)] - else ! 2D/3D: Bx, By, Bz as variables - B%L = [qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(1) - 1), & - qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(2) - 1), & - qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(3) - 1)] - B%R = [qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(1) - 1), & - qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(2) - 1), & - qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(3) - 1)] - end if - end if + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - ! Sum properties of all fluid components - rho%L = 0._wp; gamma%L = 0._wp; pi_inf%L = 0._wp; qv%L = 0._wp - rho%R = 0._wp; gamma%R = 0._wp; pi_inf%R = 0._wp; qv%R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho%L = rho%L + alpha_rho_L(i) - gamma%L = gamma%L + alpha_L(i)*gammas(i) - pi_inf%L = pi_inf%L + alpha_L(i)*pi_infs(i) - qv%L = qv%L + alpha_rho_L(i)*qvs(i) - - rho%R = rho%R + alpha_rho_R(i) - gamma%R = gamma%R + alpha_R(i)*gammas(i) - pi_inf%R = pi_inf%R + alpha_R(i)*pi_infs(i) - qv%R = qv%R + alpha_rho_R(i)*qvs(i) - end do + pres%L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres%R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - pres_mag%L = 0.5_wp*sum(B%L**2._wp) - pres_mag%R = 0.5_wp*sum(B%R**2._wp) - E%L = gamma%L*pres%L + pi_inf%L + 0.5_wp*rho%L*vel_rms%L + qv%L + pres_mag%L - E%R = gamma%R*pres%R + pi_inf%R + 0.5_wp*rho%R*vel_rms%R + qv%R + pres_mag%R ! includes magnetic energy - H_no_mag%L = (E%L + pres%L - pres_mag%L)/rho%L - H_no_mag%R = (E%R + pres%R - pres_mag%R)/rho%R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) - - ! (2) Compute fast wave speeds - call s_compute_speed_of_sound(pres%L, rho%L, gamma%L, pi_inf%L, H_no_mag%L, alpha_L, vel_rms%L, 0._wp, c%L) - call s_compute_speed_of_sound(pres%R, rho%R, gamma%R, pi_inf%R, H_no_mag%R, alpha_R, vel_rms%R, 0._wp, c%R) - call s_compute_fast_magnetosonic_speed(rho%L, c%L, B%L, norm_dir, c_fast%L, H_no_mag%L) - call s_compute_fast_magnetosonic_speed(rho%R, c%R, B%R, norm_dir, c_fast%R, H_no_mag%R) - - ! (3) Compute contact speed s_M [Miyoshi Equ. (38)] - s_L = min(vel%L(1) - c_fast%L, vel%R(1) - c_fast%R) - s_R = max(vel%R(1) + c_fast%R, vel%L(1) + c_fast%L) - - pTot_L = pres%L + pres_mag%L - pTot_R = pres%R + pres_mag%R - - s_M = (((s_R - vel%R(1))*rho%R*vel%R(1) - & - (s_L - vel%L(1))*rho%L*vel%L(1) - pTot_R + pTot_L)/ & - ((s_R - vel%R(1))*rho%R - (s_L - vel%L(1))*rho%L)) - - ! (4) Compute star state variables - rhoL_star = rho%L*(s_L - vel%L(1))/(s_L - s_M) - rhoR_star = rho%R*(s_R - vel%R(1))/(s_R - s_M) - p_star = pTot_L + rho%L*(s_L - vel%L(1))*(s_M - vel%L(1))/(s_L - s_M) - E_starL = ((s_L - vel%L(1))*E%L - pTot_L*vel%L(1) + p_star*s_M)/(s_L - s_M) - E_starR = ((s_R - vel%R(1))*E%R - pTot_R*vel%R(1) + p_star*s_M)/(s_R - s_M) - - ! (5) Compute left/right state vectors and fluxes - U_L = [rho%L, rho%L*vel%L(1:3), B%L(2:3), E%L] - U_starL = [rhoL_star, rhoL_star*s_M, rhoL_star*vel%L(2:3), B%L(2:3), E_starL] - U_R = [rho%R, rho%R*vel%R(1:3), B%R(2:3), E%R] - U_starR = [rhoR_star, rhoR_star*s_M, rhoR_star*vel%R(2:3), B%R(2:3), E_starR] - - ! Compute the left/right fluxes - F_L(1) = U_L(2) - F_L(2) = U_L(2)*vel%L(1) - B%L(1)*B%L(1) + pTot_L - F_L(3:4) = U_L(2)*vel%L(2:3) - B%L(1)*B%L(2:3) - F_L(5:6) = vel%L(1)*B%L(2:3) - vel%L(2:3)*B%L(1) - F_L(7) = (E%L + pTot_L)*vel%L(1) - B%L(1)*(vel%L(1)*B%L(1) + vel%L(2)*B%L(2) + vel%L(3)*B%L(3)) - - F_R(1) = U_R(2) - F_R(2) = U_R(2)*vel%R(1) - B%R(1)*B%R(1) + pTot_R - F_R(3:4) = U_R(2)*vel%R(2:3) - B%R(1)*B%R(2:3) - F_R(5:6) = vel%R(1)*B%R(2:3) - vel%R(2:3)*B%R(1) - F_R(7) = (E%R + pTot_R)*vel%R(1) - B%R(1)*(vel%R(1)*B%R(1) + vel%R(2)*B%R(2) + vel%R(3)*B%R(3)) - ! Compute the star flux using HLL relation - F_starL = F_L + s_L*(U_starL - U_L) - F_starR = F_R + s_R*(U_starR - U_R) - ! Compute the rotational (Alfvén) speeds - s_starL = s_M - abs(B%L(1))/sqrt(rhoL_star) - s_starR = s_M + abs(B%L(1))/sqrt(rhoR_star) - ! Compute the double–star states [Miyoshi Eqns. (59)-(62)] - sqrt_rhoL_star = sqrt(rhoL_star); sqrt_rhoR_star = sqrt(rhoR_star) - vL_star = vel%L(2); wL_star = vel%L(3) - vR_star = vel%R(2); wR_star = vel%R(3) - - ! (6) Compute the double–star states [Miyoshi Eqns. (59)-(62)] - denom_ds = sqrt_rhoL_star + sqrt_rhoR_star - sign_Bx = sign(1._wp, B%L(1)) - v_double = (sqrt_rhoL_star*vL_star + sqrt_rhoR_star*vR_star + (B%R(2) - B%L(2))*sign_Bx)/denom_ds - w_double = (sqrt_rhoL_star*wL_star + sqrt_rhoR_star*wR_star + (B%R(3) - B%L(3))*sign_Bx)/denom_ds - By_double = (sqrt_rhoL_star*B%R(2) + sqrt_rhoR_star*B%L(2) + sqrt_rhoL_star*sqrt_rhoR_star*(vR_star - vL_star)*sign_Bx)/denom_ds - Bz_double = (sqrt_rhoL_star*B%R(3) + sqrt_rhoR_star*B%L(3) + sqrt_rhoL_star*sqrt_rhoR_star*(wR_star - wL_star)*sign_Bx)/denom_ds - - E_doubleL = E_starL - sqrt_rhoL_star*((vL_star*B%L(2) + wL_star*B%L(3)) - (v_double*By_double + w_double*Bz_double))*sign_Bx - E_doubleR = E_starR + sqrt_rhoR_star*((vR_star*B%R(2) + wR_star*B%R(3)) - (v_double*By_double + w_double*Bz_double))*sign_Bx - E_double = 0.5_wp*(E_doubleL + E_doubleR) - - U_doubleL = [rhoL_star, rhoL_star*s_M, rhoL_star*v_double, rhoL_star*w_double, By_double, Bz_double, E_double] - U_doubleR = [rhoR_star, rhoR_star*s_M, rhoR_star*v_double, rhoR_star*w_double, By_double, Bz_double, E_double] - - ! (11) Choose HLLD flux based on wave-speed regions - if (0.0_wp <= s_L) then - F_hlld = F_L - else if (0.0_wp <= s_starL) then - F_hlld = F_L + s_L*(U_starL - U_L) - else if (0.0_wp <= s_M) then - F_hlld = F_starL + s_starL*(U_doubleL - U_starL) - else if (0.0_wp <= s_starR) then - F_hlld = F_starR + s_starR*(U_doubleR - U_starR) - else if (0.0_wp <= s_R) then - F_hlld = F_R + s_R*(U_starR - U_R) - else - F_hlld = F_R + ! NOTE: unlike HLL, Bx, By, Bz are permutated by dir_idx for simpler logic + if (mhd) then + if (n == 0) then ! 1D: constant Bx; By, Bz as variables; only in x so not permutated + B%L = [Bx0, qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg), qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1)] + B%R = [Bx0, qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg), qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1)] + else ! 2D/3D: Bx, By, Bz as variables + B%L = [qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(1) - 1), & + qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(2) - 1), & + qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(3) - 1)] + B%R = [qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(1) - 1), & + qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(2) - 1), & + qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(3) - 1)] end if + end if - ! (12) Reorder and write temporary variables to the flux array - ! Mass - flux_rs${XYZ}$_vf(j, k, l, 1) = F_hlld(1) ! TODO multi-component - ! Momentum - flux_rs${XYZ}$_vf(j, k, l, [contxe + dir_idx(1), contxe + dir_idx(2), contxe + dir_idx(3)]) = F_hlld([2, 3, 4]) - ! Magnetic field - if (n == 0) then - flux_rs${XYZ}$_vf(j, k, l, [B_idx%beg, B_idx%beg + 1]) = F_hlld([5, 6]) - else - flux_rs${XYZ}$_vf(j, k, l, [B_idx%beg + dir_idx(2) - 1, B_idx%beg + dir_idx(3) - 1]) = F_hlld([5, 6]) - end if - ! Energy - flux_rs${XYZ}$_vf(j, k, l, E_idx) = F_hlld(7) - ! Partial fraction - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = 0._wp ! TODO multi-component (zero for now) - end do + ! Sum properties of all fluid components + rho%L = 0._wp; gamma%L = 0._wp; pi_inf%L = 0._wp; qv%L = 0._wp + rho%R = 0._wp; gamma%R = 0._wp; pi_inf%R = 0._wp; qv%R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho%L = rho%L + alpha_rho_L(i) + gamma%L = gamma%L + alpha_L(i)*gammas(i) + pi_inf%L = pi_inf%L + alpha_L(i)*pi_infs(i) + qv%L = qv%L + alpha_rho_L(i)*qvs(i) + + rho%R = rho%R + alpha_rho_R(i) + gamma%R = gamma%R + alpha_R(i)*gammas(i) + pi_inf%R = pi_inf%R + alpha_R(i)*pi_infs(i) + qv%R = qv%R + alpha_rho_R(i)*qvs(i) + end do + + pres_mag%L = 0.5_wp*sum(B%L**2._wp) + pres_mag%R = 0.5_wp*sum(B%R**2._wp) + E%L = gamma%L*pres%L + pi_inf%L + 0.5_wp*rho%L*vel_rms%L + qv%L + pres_mag%L + E%R = gamma%R*pres%R + pi_inf%R + 0.5_wp*rho%R*vel_rms%R + qv%R + pres_mag%R ! includes magnetic energy + H_no_mag%L = (E%L + pres%L - pres_mag%L)/rho%L + H_no_mag%R = (E%R + pres%R - pres_mag%R)/rho%R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) + + ! (2) Compute fast wave speeds + call s_compute_speed_of_sound(pres%L, rho%L, gamma%L, pi_inf%L, H_no_mag%L, alpha_L, vel_rms%L, 0._wp, c%L) + call s_compute_speed_of_sound(pres%R, rho%R, gamma%R, pi_inf%R, H_no_mag%R, alpha_R, vel_rms%R, 0._wp, c%R) + call s_compute_fast_magnetosonic_speed(rho%L, c%L, B%L, norm_dir, c_fast%L, H_no_mag%L) + call s_compute_fast_magnetosonic_speed(rho%R, c%R, B%R, norm_dir, c_fast%R, H_no_mag%R) + + ! (3) Compute contact speed s_M [Miyoshi Equ. (38)] + s_L = min(vel%L(1) - c_fast%L, vel%R(1) - c_fast%R) + s_R = max(vel%R(1) + c_fast%R, vel%L(1) + c_fast%L) + + pTot_L = pres%L + pres_mag%L + pTot_R = pres%R + pres_mag%R + + s_M = (((s_R - vel%R(1))*rho%R*vel%R(1) - & + (s_L - vel%L(1))*rho%L*vel%L(1) - pTot_R + pTot_L)/ & + ((s_R - vel%R(1))*rho%R - (s_L - vel%L(1))*rho%L)) + + ! (4) Compute star state variables + rhoL_star = rho%L*(s_L - vel%L(1))/(s_L - s_M) + rhoR_star = rho%R*(s_R - vel%R(1))/(s_R - s_M) + p_star = pTot_L + rho%L*(s_L - vel%L(1))*(s_M - vel%L(1))/(s_L - s_M) + E_starL = ((s_L - vel%L(1))*E%L - pTot_L*vel%L(1) + p_star*s_M)/(s_L - s_M) + E_starR = ((s_R - vel%R(1))*E%R - pTot_R*vel%R(1) + p_star*s_M)/(s_R - s_M) + + ! (5) Compute left/right state vectors and fluxes + U_L = [rho%L, rho%L*vel%L(1:3), B%L(2:3), E%L] + U_starL = [rhoL_star, rhoL_star*s_M, rhoL_star*vel%L(2:3), B%L(2:3), E_starL] + U_R = [rho%R, rho%R*vel%R(1:3), B%R(2:3), E%R] + U_starR = [rhoR_star, rhoR_star*s_M, rhoR_star*vel%R(2:3), B%R(2:3), E_starR] + + ! Compute the left/right fluxes + F_L(1) = U_L(2) + F_L(2) = U_L(2)*vel%L(1) - B%L(1)*B%L(1) + pTot_L + F_L(3:4) = U_L(2)*vel%L(2:3) - B%L(1)*B%L(2:3) + F_L(5:6) = vel%L(1)*B%L(2:3) - vel%L(2:3)*B%L(1) + F_L(7) = (E%L + pTot_L)*vel%L(1) - B%L(1)*(vel%L(1)*B%L(1) + vel%L(2)*B%L(2) + vel%L(3)*B%L(3)) + + F_R(1) = U_R(2) + F_R(2) = U_R(2)*vel%R(1) - B%R(1)*B%R(1) + pTot_R + F_R(3:4) = U_R(2)*vel%R(2:3) - B%R(1)*B%R(2:3) + F_R(5:6) = vel%R(1)*B%R(2:3) - vel%R(2:3)*B%R(1) + F_R(7) = (E%R + pTot_R)*vel%R(1) - B%R(1)*(vel%R(1)*B%R(1) + vel%R(2)*B%R(2) + vel%R(3)*B%R(3)) + ! Compute the star flux using HLL relation + F_starL = F_L + s_L*(U_starL - U_L) + F_starR = F_R + s_R*(U_starR - U_R) + ! Compute the rotational (Alfvén) speeds + s_starL = s_M - abs(B%L(1))/sqrt(rhoL_star) + s_starR = s_M + abs(B%L(1))/sqrt(rhoR_star) + ! Compute the double–star states [Miyoshi Eqns. (59)-(62)] + sqrt_rhoL_star = sqrt(rhoL_star); sqrt_rhoR_star = sqrt(rhoR_star) + vL_star = vel%L(2); wL_star = vel%L(3) + vR_star = vel%R(2); wR_star = vel%R(3) + + ! (6) Compute the double–star states [Miyoshi Eqns. (59)-(62)] + denom_ds = sqrt_rhoL_star + sqrt_rhoR_star + sign_Bx = sign(1._wp, B%L(1)) + v_double = (sqrt_rhoL_star*vL_star + sqrt_rhoR_star*vR_star + (B%R(2) - B%L(2))*sign_Bx)/denom_ds + w_double = (sqrt_rhoL_star*wL_star + sqrt_rhoR_star*wR_star + (B%R(3) - B%L(3))*sign_Bx)/denom_ds + By_double = (sqrt_rhoL_star*B%R(2) + sqrt_rhoR_star*B%L(2) + sqrt_rhoL_star*sqrt_rhoR_star*(vR_star - vL_star)*sign_Bx)/denom_ds + Bz_double = (sqrt_rhoL_star*B%R(3) + sqrt_rhoR_star*B%L(3) + sqrt_rhoL_star*sqrt_rhoR_star*(wR_star - wL_star)*sign_Bx)/denom_ds + + E_doubleL = E_starL - sqrt_rhoL_star*((vL_star*B%L(2) + wL_star*B%L(3)) - (v_double*By_double + w_double*Bz_double))*sign_Bx + E_doubleR = E_starR + sqrt_rhoR_star*((vR_star*B%R(2) + wR_star*B%R(3)) - (v_double*By_double + w_double*Bz_double))*sign_Bx + E_double = 0.5_wp*(E_doubleL + E_doubleR) + + U_doubleL = [rhoL_star, rhoL_star*s_M, rhoL_star*v_double, rhoL_star*w_double, By_double, Bz_double, E_double] + U_doubleR = [rhoR_star, rhoR_star*s_M, rhoR_star*v_double, rhoR_star*w_double, By_double, Bz_double, E_double] + + ! (11) Choose HLLD flux based on wave-speed regions + if (0.0_wp <= s_L) then + F_hlld = F_L + else if (0.0_wp <= s_starL) then + F_hlld = F_L + s_L*(U_starL - U_L) + else if (0.0_wp <= s_M) then + F_hlld = F_starL + s_starL*(U_doubleL - U_starL) + else if (0.0_wp <= s_starR) then + F_hlld = F_starR + s_starR*(U_doubleR - U_starR) + else if (0.0_wp <= s_R) then + F_hlld = F_R + s_R*(U_starR - U_R) + else + F_hlld = F_R + end if - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp + ! (12) Reorder and write temporary variables to the flux array + ! Mass + flux_rs${XYZ}$_vf(j, k, l, 1) = F_hlld(1) ! TODO multi-component + ! Momentum + flux_rs${XYZ}$_vf(j, k, l, [contxe + dir_idx(1), contxe + dir_idx(2), contxe + dir_idx(3)]) = F_hlld([2, 3, 4]) + ! Magnetic field + if (n == 0) then + flux_rs${XYZ}$_vf(j, k, l, [B_idx%beg, B_idx%beg + 1]) = F_hlld([5, 6]) + else + flux_rs${XYZ}$_vf(j, k, l, [B_idx%beg + dir_idx(2) - 1, B_idx%beg + dir_idx(3) - 1]) = F_hlld([5, 6]) + end if + ! Energy + flux_rs${XYZ}$_vf(j, k, l, E_idx) = F_hlld(7) + ! Partial fraction + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = 0._wp ! TODO multi-component (zero for now) end do + + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() #:endblock UNDEF_AMD end if #:endfor @@ -4031,55 +4031,55 @@ contains if (norm_dir == 1) then if (bc_x%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning - #:call GPU_PARALLEL_LOOP(collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qL_prim_rsx_vf(-1, k, l, i) = & - qR_prim_rsx_vf(0, k, l, i) - end do + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qL_prim_rsx_vf(-1, k, l, i) = & + qR_prim_rsx_vf(0, k, l, i) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (viscous) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) + do i = momxb, momxe + do l = isz%beg, isz%end + do k = isy%beg, isy%end + + dqL_prim_dx_vf(i)%sf(-1, k, l) = & + dqR_prim_dx_vf(i)%sf(0, k, l) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + if (n > 0) then + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end - dqL_prim_dx_vf(i)%sf(-1, k, l) = & - dqR_prim_dx_vf(i)%sf(0, k, l) + dqL_prim_dy_vf(i)%sf(-1, k, l) = & + dqR_prim_dy_vf(i)%sf(0, k, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() - if (n > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) + if (p > 0) then + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end - dqL_prim_dy_vf(i)%sf(-1, k, l) = & - dqR_prim_dy_vf(i)%sf(0, k, l) + dqL_prim_dz_vf(i)%sf(-1, k, l) = & + dqR_prim_dz_vf(i)%sf(0, k, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP - - if (p > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do k = isy%beg, isy%end - - dqL_prim_dz_vf(i)%sf(-1, k, l) = & - dqR_prim_dz_vf(i)%sf(0, k, l) - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -4090,56 +4090,56 @@ contains if (bc_x%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end - #:call GPU_PARALLEL_LOOP(collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qR_prim_rsx_vf(m + 1, k, l, i) = & - qL_prim_rsx_vf(m, k, l, i) - end do + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qR_prim_rsx_vf(m + 1, k, l, i) = & + qL_prim_rsx_vf(m, k, l, i) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (viscous) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) + do i = momxb, momxe + do l = isz%beg, isz%end + do k = isy%beg, isy%end + + dqR_prim_dx_vf(i)%sf(m + 1, k, l) = & + dqL_prim_dx_vf(i)%sf(m, k, l) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + if (n > 0) then + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end - dqR_prim_dx_vf(i)%sf(m + 1, k, l) = & - dqL_prim_dx_vf(i)%sf(m, k, l) + dqR_prim_dy_vf(i)%sf(m + 1, k, l) = & + dqL_prim_dy_vf(i)%sf(m, k, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() - if (n > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) + if (p > 0) then + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end - dqR_prim_dy_vf(i)%sf(m + 1, k, l) = & - dqL_prim_dy_vf(i)%sf(m, k, l) + dqR_prim_dz_vf(i)%sf(m + 1, k, l) = & + dqL_prim_dz_vf(i)%sf(m, k, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP - - if (p > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do k = isy%beg, isy%end - - dqR_prim_dz_vf(i)%sf(m + 1, k, l) = & - dqL_prim_dz_vf(i)%sf(m, k, l) - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -4153,52 +4153,52 @@ contains elseif (norm_dir == 2) then if (bc_y%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning - #:call GPU_PARALLEL_LOOP(collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qL_prim_rsy_vf(-1, k, l, i) = & - qR_prim_rsy_vf(0, k, l, i) - end do + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qL_prim_rsy_vf(-1, k, l, i) = & + qR_prim_rsy_vf(0, k, l, i) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (viscous) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqL_prim_dx_vf(i)%sf(j, -1, l) = & - dqR_prim_dx_vf(i)%sf(j, 0, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqL_prim_dx_vf(i)%sf(j, -1, l) = & + dqR_prim_dx_vf(i)%sf(j, 0, l) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqL_prim_dy_vf(i)%sf(j, -1, l) = & + dqR_prim_dy_vf(i)%sf(j, 0, l) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) + if (p > 0) then + $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end - dqL_prim_dy_vf(i)%sf(j, -1, l) = & - dqR_prim_dy_vf(i)%sf(j, 0, l) + dqL_prim_dz_vf(i)%sf(j, -1, l) = & + dqR_prim_dz_vf(i)%sf(j, 0, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP - - if (p > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqL_prim_dz_vf(i)%sf(j, -1, l) = & - dqR_prim_dz_vf(i)%sf(j, 0, l) - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -4207,52 +4207,52 @@ contains if (bc_y%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end - #:call GPU_PARALLEL_LOOP(collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qR_prim_rsy_vf(n + 1, k, l, i) = & - qL_prim_rsy_vf(n, k, l, i) - end do + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qR_prim_rsy_vf(n + 1, k, l, i) = & + qL_prim_rsy_vf(n, k, l, i) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (viscous) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqR_prim_dx_vf(i)%sf(j, n + 1, l) = & - dqL_prim_dx_vf(i)%sf(j, n, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqR_prim_dx_vf(i)%sf(j, n + 1, l) = & + dqL_prim_dx_vf(i)%sf(j, n, l) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqR_prim_dy_vf(i)%sf(j, n + 1, l) = & + dqL_prim_dy_vf(i)%sf(j, n, l) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) + if (p > 0) then + $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end - dqR_prim_dy_vf(i)%sf(j, n + 1, l) = & - dqL_prim_dy_vf(i)%sf(j, n, l) + dqR_prim_dz_vf(i)%sf(j, n + 1, l) = & + dqL_prim_dz_vf(i)%sf(j, n, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP - - if (p > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqR_prim_dz_vf(i)%sf(j, n + 1, l) = & - dqL_prim_dz_vf(i)%sf(j, n, l) - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -4264,98 +4264,98 @@ contains else if (bc_z%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning - #:call GPU_PARALLEL_LOOP(collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qL_prim_rsz_vf(-1, k, l, i) = & - qR_prim_rsz_vf(0, k, l, i) - end do + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qL_prim_rsz_vf(-1, k, l, i) = & + qR_prim_rsz_vf(0, k, l, i) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (viscous) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqL_prim_dx_vf(i)%sf(j, k, -1) = & - dqR_prim_dx_vf(i)%sf(j, k, 0) - end do + $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqL_prim_dx_vf(i)%sf(j, k, -1) = & + dqR_prim_dx_vf(i)%sf(j, k, 0) end do end do - #:endcall GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqL_prim_dy_vf(i)%sf(j, k, -1) = & - dqR_prim_dy_vf(i)%sf(j, k, 0) - end do + end do + $:END_GPU_PARALLEL_LOOP() + $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqL_prim_dy_vf(i)%sf(j, k, -1) = & + dqR_prim_dy_vf(i)%sf(j, k, 0) end do end do - #:endcall GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqL_prim_dz_vf(i)%sf(j, k, -1) = & - dqR_prim_dz_vf(i)%sf(j, k, 0) - end do + end do + $:END_GPU_PARALLEL_LOOP() + $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqL_prim_dz_vf(i)%sf(j, k, -1) = & + dqR_prim_dz_vf(i)%sf(j, k, 0) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end if if (bc_z%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end - #:call GPU_PARALLEL_LOOP(collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qR_prim_rsz_vf(p + 1, k, l, i) = & - qL_prim_rsz_vf(p, k, l, i) - end do + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qR_prim_rsz_vf(p + 1, k, l, i) = & + qL_prim_rsz_vf(p, k, l, i) end do end do - #:endcall GPU_PARALLEL_LOOP - - if (viscous) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqR_prim_dx_vf(i)%sf(j, k, p + 1) = & - dqL_prim_dx_vf(i)%sf(j, k, p) - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqR_prim_dy_vf(i)%sf(j, k, p + 1) = & - dqL_prim_dy_vf(i)%sf(j, k, p) - end do + if (viscous) then + $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqR_prim_dx_vf(i)%sf(j, k, p + 1) = & + dqL_prim_dx_vf(i)%sf(j, k, p) end do end do - #:endcall GPU_PARALLEL_LOOP - - #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqR_prim_dz_vf(i)%sf(j, k, p + 1) = & - dqL_prim_dz_vf(i)%sf(j, k, p) - end do + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqR_prim_dy_vf(i)%sf(j, k, p + 1) = & + dqL_prim_dy_vf(i)%sf(j, k, p) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqR_prim_dz_vf(i)%sf(j, k, p + 1) = & + dqL_prim_dz_vf(i)%sf(j, k, p) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end if @@ -4399,141 +4399,141 @@ contains if (viscous .or. (surface_tension)) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = momxb, E_idx - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - flux_src_vf(i)%sf(j, k, l) = 0._wp - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = momxb, E_idx + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + flux_src_vf(i)%sf(j, k, l) = 0._wp end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (chem_params%diffusion) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = E_idx, chemxe - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - if (i == E_idx .or. i >= chemxb) then - flux_src_vf(i)%sf(j, k, l) = 0._wp - end if - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = E_idx, chemxe + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + if (i == E_idx .or. i >= chemxb) then + flux_src_vf(i)%sf(j, k, l) = 0._wp + end if end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (qbmm) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, 4 - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + 1 - mom_sp_rsx_vf(j, k, l, i) = mom_sp(i)%sf(j, k, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, 4 + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + 1 + mom_sp_rsx_vf(j, k, l, i) = mom_sp(i)%sf(j, k, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if ! Reshaping Inputted Data in y-direction elseif (norm_dir == 2) then if (viscous .or. (surface_tension)) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = momxb, E_idx - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - flux_src_vf(i)%sf(k, j, l) = 0._wp - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = momxb, E_idx + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + flux_src_vf(i)%sf(k, j, l) = 0._wp end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (chem_params%diffusion) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = E_idx, chemxe - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - if (i == E_idx .or. i >= chemxb) then - flux_src_vf(i)%sf(k, j, l) = 0._wp - end if - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = E_idx, chemxe + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + if (i == E_idx .or. i >= chemxb) then + flux_src_vf(i)%sf(k, j, l) = 0._wp + end if end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (qbmm) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, 4 - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + 1 - mom_sp_rsy_vf(j, k, l, i) = mom_sp(i)%sf(k, j, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, 4 + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + 1 + mom_sp_rsy_vf(j, k, l, i) = mom_sp(i)%sf(k, j, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if ! Reshaping Inputted Data in z-direction else if (viscous .or. (surface_tension)) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = momxb, E_idx - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end - flux_src_vf(i)%sf(l, k, j) = 0._wp - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = momxb, E_idx + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end + flux_src_vf(i)%sf(l, k, j) = 0._wp end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (chem_params%diffusion) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = E_idx, chemxe - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end - if (i == E_idx .or. i >= chemxb) then - flux_src_vf(i)%sf(l, k, j) = 0._wp - end if - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = E_idx, chemxe + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end + if (i == E_idx .or. i >= chemxb) then + flux_src_vf(i)%sf(l, k, j) = 0._wp + end if end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (qbmm) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, 4 - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + 1 - mom_sp_rsz_vf(j, k, l, i) = mom_sp(i)%sf(l, k, j) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, 4 + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + 1 + mom_sp_rsz_vf(j, k, l, i) = mom_sp(i)%sf(l, k, j) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end if @@ -4590,113 +4590,113 @@ contains integer :: i_vel !!< Loop iterator for velocity components. integer :: idx_rp(3) !!< Indices $(j,k,l)$ of 'right' point for averaging. - #:call GPU_PARALLEL_LOOP(collapse=3, private='[idx_rp, avg_v_int, avg_dvdx_int, avg_dvdy_int, avg_dvdz_int, Re_s, Re_b, vel_src_int, r_eff, divergence_cyl, stress_vector_shear, stress_normal_bulk, div_v_term_const]') - do l = iz%beg, iz%end - do k = iy%beg, iy%end - do j = ix%beg, ix%end + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,idx_rp, avg_v_int, avg_dvdx_int, avg_dvdy_int, avg_dvdz_int, Re_s, Re_b, vel_src_int, r_eff, divergence_cyl, stress_vector_shear, stress_normal_bulk, div_v_term_const]') + do l = iz%beg, iz%end + do k = iy%beg, iy%end + do j = ix%beg, ix%end + + ! Determine indices for the 'right' state for averaging across the interface + idx_rp = [j, k, l] + idx_rp(norm_dir) = idx_rp(norm_dir) + 1 + + ! Average velocities and their derivatives at the interface + ! For cylindrical: x-dir ~ axial (z_cyl), y-dir ~ radial (r_cyl), z-dir ~ azimuthal (theta_cyl) + $:GPU_LOOP(parallelism='[seq]') + do i_vel = 1, num_dims + avg_v_int(i_vel) = 0.5_wp*(velL_vf(i_vel)%sf(j, k, l) + velR_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) + + avg_dvdx_int(i_vel) = 0.5_wp*(dvelL_dx_vf(i_vel)%sf(j, k, l) + & + dvelR_dx_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) + if (num_dims > 1) then + avg_dvdy_int(i_vel) = 0.5_wp*(dvelL_dy_vf(i_vel)%sf(j, k, l) + & + dvelR_dy_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) + else + avg_dvdy_int(i_vel) = 0.0_wp + end if + if (num_dims > 2) then + avg_dvdz_int(i_vel) = 0.5_wp*(dvelL_dz_vf(i_vel)%sf(j, k, l) + & + dvelR_dz_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) + else + avg_dvdz_int(i_vel) = 0.0_wp + end if + end do + + ! Get Re numbers and interface velocity for viscous work + select case (norm_dir) + case (1) ! x-face (axial face in z_cyl direction) + Re_s = Re_avg_rsx_vf(j, k, l, 1) + Re_b = Re_avg_rsx_vf(j, k, l, 2) + vel_src_int = vel_src_rsx_vf(j, k, l, 1:num_dims) + r_eff = y_cc(k) + case (2) ! y-face (radial face in r_cyl direction) + Re_s = Re_avg_rsy_vf(k, j, l, 1) + Re_b = Re_avg_rsy_vf(k, j, l, 2) + vel_src_int = vel_src_rsy_vf(k, j, l, 1:num_dims) + r_eff = y_cb(k) + case (3) ! z-face (azimuthal face in theta_cyl direction) + Re_s = Re_avg_rsz_vf(l, k, j, 1) + Re_b = Re_avg_rsz_vf(l, k, j, 2) + vel_src_int = vel_src_rsz_vf(l, k, j, 1:num_dims) + r_eff = y_cc(k) + end select + + ! Divergence in cylindrical coordinates (vx=vz_cyl, vy=vr_cyl, vz=vtheta_cyl) + divergence_cyl = avg_dvdx_int(1) + avg_dvdy_int(2) + avg_v_int(2)/r_eff + if (num_dims > 2) then + divergence_cyl = divergence_cyl + avg_dvdz_int(3)/r_eff + end if - ! Determine indices for the 'right' state for averaging across the interface - idx_rp = [j, k, l] - idx_rp(norm_dir) = idx_rp(norm_dir) + 1 + stress_vector_shear = 0.0_wp + stress_normal_bulk = 0.0_wp - ! Average velocities and their derivatives at the interface - ! For cylindrical: x-dir ~ axial (z_cyl), y-dir ~ radial (r_cyl), z-dir ~ azimuthal (theta_cyl) - $:GPU_LOOP(parallelism='[seq]') - do i_vel = 1, num_dims - avg_v_int(i_vel) = 0.5_wp*(velL_vf(i_vel)%sf(j, k, l) + velR_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) + if (shear_stress) then + div_v_term_const = -(2.0_wp/3.0_wp)*divergence_cyl/Re_s - avg_dvdx_int(i_vel) = 0.5_wp*(dvelL_dx_vf(i_vel)%sf(j, k, l) + & - dvelR_dx_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) + select case (norm_dir) + case (1) ! X-face (axial normal, z_cyl) + stress_vector_shear(1) = (2.0_wp*avg_dvdx_int(1))/Re_s + div_v_term_const if (num_dims > 1) then - avg_dvdy_int(i_vel) = 0.5_wp*(dvelL_dy_vf(i_vel)%sf(j, k, l) + & - dvelR_dy_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) - else - avg_dvdy_int(i_vel) = 0.0_wp + stress_vector_shear(2) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s end if if (num_dims > 2) then - avg_dvdz_int(i_vel) = 0.5_wp*(dvelL_dz_vf(i_vel)%sf(j, k, l) + & - dvelR_dz_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) - else - avg_dvdz_int(i_vel) = 0.0_wp + stress_vector_shear(3) = (avg_dvdz_int(1)/r_eff + avg_dvdx_int(3))/Re_s end if - end do - - ! Get Re numbers and interface velocity for viscous work - select case (norm_dir) - case (1) ! x-face (axial face in z_cyl direction) - Re_s = Re_avg_rsx_vf(j, k, l, 1) - Re_b = Re_avg_rsx_vf(j, k, l, 2) - vel_src_int = vel_src_rsx_vf(j, k, l, 1:num_dims) - r_eff = y_cc(k) - case (2) ! y-face (radial face in r_cyl direction) - Re_s = Re_avg_rsy_vf(k, j, l, 1) - Re_b = Re_avg_rsy_vf(k, j, l, 2) - vel_src_int = vel_src_rsy_vf(k, j, l, 1:num_dims) - r_eff = y_cb(k) - case (3) ! z-face (azimuthal face in theta_cyl direction) - Re_s = Re_avg_rsz_vf(l, k, j, 1) - Re_b = Re_avg_rsz_vf(l, k, j, 2) - vel_src_int = vel_src_rsz_vf(l, k, j, 1:num_dims) - r_eff = y_cc(k) - end select - - ! Divergence in cylindrical coordinates (vx=vz_cyl, vy=vr_cyl, vz=vtheta_cyl) - divergence_cyl = avg_dvdx_int(1) + avg_dvdy_int(2) + avg_v_int(2)/r_eff - if (num_dims > 2) then - divergence_cyl = divergence_cyl + avg_dvdz_int(3)/r_eff - end if - - stress_vector_shear = 0.0_wp - stress_normal_bulk = 0.0_wp - - if (shear_stress) then - div_v_term_const = -(2.0_wp/3.0_wp)*divergence_cyl/Re_s - - select case (norm_dir) - case (1) ! X-face (axial normal, z_cyl) - stress_vector_shear(1) = (2.0_wp*avg_dvdx_int(1))/Re_s + div_v_term_const - if (num_dims > 1) then - stress_vector_shear(2) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s - end if - if (num_dims > 2) then - stress_vector_shear(3) = (avg_dvdz_int(1)/r_eff + avg_dvdx_int(3))/Re_s - end if - case (2) ! Y-face (radial normal, r_cyl) - if (num_dims > 1) then - stress_vector_shear(1) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s - stress_vector_shear(2) = (2.0_wp*avg_dvdy_int(2))/Re_s + div_v_term_const - if (num_dims > 2) then - stress_vector_shear(3) = (avg_dvdz_int(2)/r_eff - avg_v_int(3)/r_eff + avg_dvdy_int(3))/Re_s - end if - else - stress_vector_shear(1) = (2.0_wp*avg_dvdx_int(1))/Re_s + div_v_term_const - end if - case (3) ! Z-face (azimuthal normal, theta_cyl) + case (2) ! Y-face (radial normal, r_cyl) + if (num_dims > 1) then + stress_vector_shear(1) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s + stress_vector_shear(2) = (2.0_wp*avg_dvdy_int(2))/Re_s + div_v_term_const if (num_dims > 2) then - stress_vector_shear(1) = (avg_dvdz_int(1)/r_eff + avg_dvdx_int(3))/Re_s - stress_vector_shear(2) = (avg_dvdz_int(2)/r_eff - avg_v_int(3)/r_eff + avg_dvdy_int(3))/Re_s - stress_vector_shear(3) = (2.0_wp*(avg_dvdz_int(3)/r_eff + avg_v_int(2)/r_eff))/Re_s + div_v_term_const + stress_vector_shear(3) = (avg_dvdz_int(2)/r_eff - avg_v_int(3)/r_eff + avg_dvdy_int(3))/Re_s end if - end select + else + stress_vector_shear(1) = (2.0_wp*avg_dvdx_int(1))/Re_s + div_v_term_const + end if + case (3) ! Z-face (azimuthal normal, theta_cyl) + if (num_dims > 2) then + stress_vector_shear(1) = (avg_dvdz_int(1)/r_eff + avg_dvdx_int(3))/Re_s + stress_vector_shear(2) = (avg_dvdz_int(2)/r_eff - avg_v_int(3)/r_eff + avg_dvdy_int(3))/Re_s + stress_vector_shear(3) = (2.0_wp*(avg_dvdz_int(3)/r_eff + avg_v_int(2)/r_eff))/Re_s + div_v_term_const + end if + end select - $:GPU_LOOP(parallelism='[seq]') - do i_vel = 1, num_dims - flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) = flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) - stress_vector_shear(i_vel) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - vel_src_int(i_vel)*stress_vector_shear(i_vel) - end do - end if + $:GPU_LOOP(parallelism='[seq]') + do i_vel = 1, num_dims + flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) = flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) - stress_vector_shear(i_vel) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - vel_src_int(i_vel)*stress_vector_shear(i_vel) + end do + end if - if (bulk_stress) then - stress_normal_bulk = divergence_cyl/Re_b + if (bulk_stress) then + stress_normal_bulk = divergence_cyl/Re_b - flux_src_vf(momxb + norm_dir - 1)%sf(j, k, l) = flux_src_vf(momxb + norm_dir - 1)%sf(j, k, l) - stress_normal_bulk - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - vel_src_int(norm_dir)*stress_normal_bulk - end if + flux_src_vf(momxb + norm_dir - 1)%sf(j, k, l) = flux_src_vf(momxb + norm_dir - 1)%sf(j, k, l) - stress_normal_bulk + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - vel_src_int(norm_dir)*stress_normal_bulk + end if - end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_cylindrical_viscous_source_flux @@ -4750,88 +4750,88 @@ contains real(wp) :: divergence_v !< Velocity divergence at interface. - #:call GPU_PARALLEL_LOOP(collapse=3, private='[idx_right_phys, vel_grad_avg, current_tau_shear, current_tau_bulk, vel_src_at_interface, Re_shear, Re_bulk, divergence_v, i_dim, vel_comp_idx]') - do l_loop = isz%beg, isz%end - do k_loop = isy%beg, isy%end - do j_loop = isx%beg, isx%end + $:GPU_PARALLEL_LOOP(collapse=3, private='[j_loop,k_loop,l_loop,idx_right_phys, vel_grad_avg, current_tau_shear, current_tau_bulk, vel_src_at_interface, Re_shear, Re_bulk, divergence_v, i_dim, vel_comp_idx]') + do l_loop = isz%beg, isz%end + do k_loop = isy%beg, isy%end + do j_loop = isx%beg, isx%end + + idx_right_phys(1) = j_loop + idx_right_phys(2) = k_loop + idx_right_phys(3) = l_loop + idx_right_phys(norm_dir) = idx_right_phys(norm_dir) + 1 + + vel_grad_avg = 0.0_wp + do vel_comp_idx = 1, num_dims + vel_grad_avg(vel_comp_idx, 1) = 0.5_wp*(dvelL_dx_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & + dvelR_dx_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) + if (num_dims > 1) then + vel_grad_avg(vel_comp_idx, 2) = 0.5_wp*(dvelL_dy_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & + dvelR_dy_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) + end if + if (num_dims > 2) then + vel_grad_avg(vel_comp_idx, 3) = 0.5_wp*(dvelL_dz_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & + dvelR_dz_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) + end if + end do - idx_right_phys(1) = j_loop - idx_right_phys(2) = k_loop - idx_right_phys(3) = l_loop - idx_right_phys(norm_dir) = idx_right_phys(norm_dir) + 1 + divergence_v = 0.0_wp + do i_dim = 1, num_dims + divergence_v = divergence_v + vel_grad_avg(i_dim, i_dim) + end do - vel_grad_avg = 0.0_wp - do vel_comp_idx = 1, num_dims - vel_grad_avg(vel_comp_idx, 1) = 0.5_wp*(dvelL_dx_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & - dvelR_dx_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) - if (num_dims > 1) then - vel_grad_avg(vel_comp_idx, 2) = 0.5_wp*(dvelL_dy_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & - dvelR_dy_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) - end if - if (num_dims > 2) then - vel_grad_avg(vel_comp_idx, 3) = 0.5_wp*(dvelL_dz_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & - dvelR_dz_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) - end if + vel_src_at_interface = 0.0_wp + if (norm_dir == 1) then + Re_shear = Re_avg_rsx_vf(j_loop, k_loop, l_loop, 1) + Re_bulk = Re_avg_rsx_vf(j_loop, k_loop, l_loop, 2) + do i_dim = 1, num_dims + vel_src_at_interface(i_dim) = vel_src_rsx_vf(j_loop, k_loop, l_loop, i_dim) end do - - divergence_v = 0.0_wp + else if (norm_dir == 2) then + Re_shear = Re_avg_rsy_vf(k_loop, j_loop, l_loop, 1) + Re_bulk = Re_avg_rsy_vf(k_loop, j_loop, l_loop, 2) do i_dim = 1, num_dims - divergence_v = divergence_v + vel_grad_avg(i_dim, i_dim) + vel_src_at_interface(i_dim) = vel_src_rsy_vf(k_loop, j_loop, l_loop, i_dim) end do + else + Re_shear = Re_avg_rsz_vf(l_loop, k_loop, j_loop, 1) + Re_bulk = Re_avg_rsz_vf(l_loop, k_loop, j_loop, 2) + do i_dim = 1, num_dims + vel_src_at_interface(i_dim) = vel_src_rsz_vf(l_loop, k_loop, j_loop, i_dim) + end do + end if - vel_src_at_interface = 0.0_wp - if (norm_dir == 1) then - Re_shear = Re_avg_rsx_vf(j_loop, k_loop, l_loop, 1) - Re_bulk = Re_avg_rsx_vf(j_loop, k_loop, l_loop, 2) - do i_dim = 1, num_dims - vel_src_at_interface(i_dim) = vel_src_rsx_vf(j_loop, k_loop, l_loop, i_dim) - end do - else if (norm_dir == 2) then - Re_shear = Re_avg_rsy_vf(k_loop, j_loop, l_loop, 1) - Re_bulk = Re_avg_rsy_vf(k_loop, j_loop, l_loop, 2) - do i_dim = 1, num_dims - vel_src_at_interface(i_dim) = vel_src_rsy_vf(k_loop, j_loop, l_loop, i_dim) - end do - else - Re_shear = Re_avg_rsz_vf(l_loop, k_loop, j_loop, 1) - Re_bulk = Re_avg_rsz_vf(l_loop, k_loop, j_loop, 2) - do i_dim = 1, num_dims - vel_src_at_interface(i_dim) = vel_src_rsz_vf(l_loop, k_loop, j_loop, i_dim) - end do - end if - - if (shear_stress) then - ! current_tau_shear = 0.0_wp - call s_calculate_shear_stress_tensor(vel_grad_avg, Re_shear, divergence_v, current_tau_shear) + if (shear_stress) then + ! current_tau_shear = 0.0_wp + call s_calculate_shear_stress_tensor(vel_grad_avg, Re_shear, divergence_v, current_tau_shear) - do i_dim = 1, num_dims - flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) = & - flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) - current_tau_shear(norm_dir, i_dim) + do i_dim = 1, num_dims + flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) = & + flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) - current_tau_shear(norm_dir, i_dim) - flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) = & - flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) - & - vel_src_at_interface(i_dim)*current_tau_shear(norm_dir, i_dim) - end do - end if + flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) = & + flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) - & + vel_src_at_interface(i_dim)*current_tau_shear(norm_dir, i_dim) + end do + end if - if (bulk_stress) then - ! current_tau_bulk = 0.0_wp - call s_calculate_bulk_stress_tensor(Re_bulk, divergence_v, current_tau_bulk) + if (bulk_stress) then + ! current_tau_bulk = 0.0_wp + call s_calculate_bulk_stress_tensor(Re_bulk, divergence_v, current_tau_bulk) - do i_dim = 1, num_dims - flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) = & - flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) - current_tau_bulk(norm_dir, i_dim) + do i_dim = 1, num_dims + flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) = & + flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) - current_tau_bulk(norm_dir, i_dim) - flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) = & - flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) - & - vel_src_at_interface(i_dim)*current_tau_bulk(norm_dir, i_dim) - end do - end if + flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) = & + flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) - & + vel_src_at_interface(i_dim)*current_tau_bulk(norm_dir, i_dim) + end do + end if - end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_cartesian_viscous_source_flux @@ -4912,155 +4912,155 @@ contains ! Reshaping Outputted Data in y-direction if (norm_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - flux_vf(i)%sf(k, j, l) = & - flux_rsy_vf(j, k, l, i) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, sys_size + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + flux_vf(i)%sf(k, j, l) = & + flux_rsy_vf(j, k, l, i) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (cyl_coord) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - flux_gsrc_vf(i)%sf(k, j, l) = & - flux_gsrc_rsy_vf(j, k, l, i) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, sys_size + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + flux_gsrc_vf(i)%sf(k, j, l) = & + flux_gsrc_rsy_vf(j, k, l, i) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - flux_src_vf(advxb)%sf(k, j, l) = & - flux_src_rsy_vf(j, k, l, advxb) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + flux_src_vf(advxb)%sf(k, j, l) = & + flux_src_rsy_vf(j, k, l, advxb) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1 .or. riemann_solver == 4) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = advxb + 1, advxe - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - flux_src_vf(i)%sf(k, j, l) = & - flux_src_rsy_vf(j, k, l, i) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = advxb + 1, advxe + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + flux_src_vf(i)%sf(k, j, l) = & + flux_src_rsy_vf(j, k, l, i) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if ! Reshaping Outputted Data in z-direction elseif (norm_dir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, sys_size + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end - flux_vf(i)%sf(l, k, j) = & - flux_rsz_vf(j, k, l, i) - end do + flux_vf(i)%sf(l, k, j) = & + flux_rsz_vf(j, k, l, i) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (grid_geometry == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, sys_size + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end - flux_gsrc_vf(i)%sf(l, k, j) = & - flux_gsrc_rsz_vf(j, k, l, i) - end do + flux_gsrc_vf(i)%sf(l, k, j) = & + flux_gsrc_rsz_vf(j, k, l, i) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if - #:call GPU_PARALLEL_LOOP(collapse=3) - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end - flux_src_vf(advxb)%sf(l, k, j) = & - flux_src_rsz_vf(j, k, l, advxb) - end do + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end + flux_src_vf(advxb)%sf(l, k, j) = & + flux_src_rsz_vf(j, k, l, advxb) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1 .or. riemann_solver == 4) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = advxb + 1, advxe - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end - flux_src_vf(i)%sf(l, k, j) = & - flux_src_rsz_vf(j, k, l, i) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - - end if - elseif (norm_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = is3%beg, is3%end + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = advxb + 1, advxe + do j = is1%beg, is1%end do k = is2%beg, is2%end - do j = is1%beg, is1%end - flux_vf(i)%sf(j, k, l) = & - flux_rsx_vf(j, k, l, i) + do l = is3%beg, is3%end + flux_src_vf(i)%sf(l, k, j) = & + flux_src_rsz_vf(j, k, l, i) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) + end if + elseif (norm_dir == 1) then + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end - flux_src_vf(advxb)%sf(j, k, l) = & - flux_src_rsx_vf(j, k, l, advxb) + flux_vf(i)%sf(j, k, l) = & + flux_rsx_vf(j, k, l, i) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + flux_src_vf(advxb)%sf(j, k, l) = & + flux_src_rsx_vf(j, k, l, advxb) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1 .or. riemann_solver == 4) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = advxb + 1, advxe - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - flux_src_vf(i)%sf(j, k, l) = & - flux_src_rsx_vf(j, k, l, i) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = advxb + 1, advxe + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + flux_src_vf(i)%sf(j, k, l) = & + flux_src_rsx_vf(j, k, l, i) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end if diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 40977b9a18..a6499901a7 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -88,141 +88,141 @@ contains integer :: j, k, l, i if (id == 1) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') - do l = isz%beg, isz%end - do k = isy%beg, isy%end - do j = isx%beg, isx%end + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') + do l = isz%beg, isz%end + do k = isy%beg, isy%end + do j = isx%beg, isx%end - w1L = gL_x(j, k, l, 1) - w2L = gL_x(j, k, l, 2) - w3L = 0._wp - if (p > 0) w3L = gL_x(j, k, l, 3) + w1L = gL_x(j, k, l, 1) + w2L = gL_x(j, k, l, 2) + w3L = 0._wp + if (p > 0) w3L = gL_x(j, k, l, 3) - w1R = gR_x(j + 1, k, l, 1) - w2R = gR_x(j + 1, k, l, 2) - w3R = 0._wp - if (p > 0) w3R = gR_x(j + 1, k, l, 3) + w1R = gR_x(j + 1, k, l, 1) + w2R = gR_x(j + 1, k, l, 2) + w3R = 0._wp + if (p > 0) w3R = gR_x(j + 1, k, l, 3) - normWL = gL_x(j, k, l, num_dims + 1) - normWR = gR_x(j + 1, k, l, num_dims + 1) + normWL = gL_x(j, k, l, num_dims + 1) + normWR = gR_x(j + 1, k, l, num_dims + 1) - w1 = (w1L + w1R)/2._wp - w2 = (w2L + w2R)/2._wp - w3 = (w3L + w3R)/2._wp - normW = (normWL + normWR)/2._wp + w1 = (w1L + w1R)/2._wp + w2 = (w2L + w2R)/2._wp + w3 = (w3L + w3R)/2._wp + normW = (normWL + normWR)/2._wp - if (normW > capillary_cutoff) then - @:compute_capillary_stress_tensor() + if (normW > capillary_cutoff) then + @:compute_capillary_stress_tensor() - do i = 1, num_dims + do i = 1, num_dims - flux_src_vf(momxb + i - 1)%sf(j, k, l) = & - flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(1, i) + flux_src_vf(momxb + i - 1)%sf(j, k, l) = & + flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(1, i) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - Omega(1, i)*vSrc_rsx_vf(j, k, l, i) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + Omega(1, i)*vSrc_rsx_vf(j, k, l, i) - end do + end do - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsx_vf(j, k, l, 1) - end if - end do + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsx_vf(j, k, l, 1) + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() elseif (id == 2) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') - do l = isz%beg, isz%end - do k = isy%beg, isy%end - do j = isx%beg, isx%end + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') + do l = isz%beg, isz%end + do k = isy%beg, isy%end + do j = isx%beg, isx%end - w1L = gL_y(k, j, l, 1) - w2L = gL_y(k, j, l, 2) - w3L = 0._wp - if (p > 0) w3L = gL_y(k, j, l, 3) + w1L = gL_y(k, j, l, 1) + w2L = gL_y(k, j, l, 2) + w3L = 0._wp + if (p > 0) w3L = gL_y(k, j, l, 3) - w1R = gR_y(k + 1, j, l, 1) - w2R = gR_y(k + 1, j, l, 2) - w3R = 0._wp - if (p > 0) w3R = gR_y(k + 1, j, l, 3) + w1R = gR_y(k + 1, j, l, 1) + w2R = gR_y(k + 1, j, l, 2) + w3R = 0._wp + if (p > 0) w3R = gR_y(k + 1, j, l, 3) - normWL = gL_y(k, j, l, num_dims + 1) - normWR = gR_y(k + 1, j, l, num_dims + 1) + normWL = gL_y(k, j, l, num_dims + 1) + normWR = gR_y(k + 1, j, l, num_dims + 1) - w1 = (w1L + w1R)/2._wp - w2 = (w2L + w2R)/2._wp - w3 = (w3L + w3R)/2._wp - normW = (normWL + normWR)/2._wp + w1 = (w1L + w1R)/2._wp + w2 = (w2L + w2R)/2._wp + w3 = (w3L + w3R)/2._wp + normW = (normWL + normWR)/2._wp - if (normW > capillary_cutoff) then - @:compute_capillary_stress_tensor() + if (normW > capillary_cutoff) then + @:compute_capillary_stress_tensor() - do i = 1, num_dims + do i = 1, num_dims - flux_src_vf(momxb + i - 1)%sf(j, k, l) = & - flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(2, i) + flux_src_vf(momxb + i - 1)%sf(j, k, l) = & + flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(2, i) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - Omega(2, i)*vSrc_rsy_vf(k, j, l, i) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + Omega(2, i)*vSrc_rsy_vf(k, j, l, i) - end do + end do - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsy_vf(k, j, l, 2) - end if - end do + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsy_vf(k, j, l, 2) + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() elseif (id == 3) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') - do l = isz%beg, isz%end - do k = isy%beg, isy%end - do j = isx%beg, isx%end + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') + do l = isz%beg, isz%end + do k = isy%beg, isy%end + do j = isx%beg, isx%end - w1L = gL_z(l, k, j, 1) - w2L = gL_z(l, k, j, 2) - w3L = 0._wp - if (p > 0) w3L = gL_z(l, k, j, 3) + w1L = gL_z(l, k, j, 1) + w2L = gL_z(l, k, j, 2) + w3L = 0._wp + if (p > 0) w3L = gL_z(l, k, j, 3) - w1R = gR_z(l + 1, k, j, 1) - w2R = gR_z(l + 1, k, j, 2) - w3R = 0._wp - if (p > 0) w3R = gR_z(l + 1, k, j, 3) + w1R = gR_z(l + 1, k, j, 1) + w2R = gR_z(l + 1, k, j, 2) + w3R = 0._wp + if (p > 0) w3R = gR_z(l + 1, k, j, 3) - normWL = gL_z(l, k, j, num_dims + 1) - normWR = gR_z(l + 1, k, j, num_dims + 1) + normWL = gL_z(l, k, j, num_dims + 1) + normWR = gR_z(l + 1, k, j, num_dims + 1) - w1 = (w1L + w1R)/2._wp - w2 = (w2L + w2R)/2._wp - w3 = (w3L + w3R)/2._wp - normW = (normWL + normWR)/2._wp + w1 = (w1L + w1R)/2._wp + w2 = (w2L + w2R)/2._wp + w3 = (w3L + w3R)/2._wp + normW = (normWL + normWR)/2._wp - if (normW > capillary_cutoff) then - @:compute_capillary_stress_tensor() + if (normW > capillary_cutoff) then + @:compute_capillary_stress_tensor() - do i = 1, num_dims + do i = 1, num_dims - flux_src_vf(momxb + i - 1)%sf(j, k, l) = & - flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(3, i) + flux_src_vf(momxb + i - 1)%sf(j, k, l) = & + flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(3, i) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - Omega(3, i)*vSrc_rsz_vf(l, k, j, i) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + Omega(3, i)*vSrc_rsz_vf(l, k, j, i) - end do + end do - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsz_vf(l, k, j, 3) - end if - end do + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsz_vf(l, k, j, 3) + end if end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if @@ -243,58 +243,58 @@ contains isx%end = m; isy%end = n; isz%end = p ! compute gradient components - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - c_divs(1)%sf(j, k, l) = 1._wp/(x_cc(j + 1) - x_cc(j - 1))* & - (q_prim_vf(c_idx)%sf(j + 1, k, l) - q_prim_vf(c_idx)%sf(j - 1, k, l)) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + c_divs(1)%sf(j, k, l) = 1._wp/(x_cc(j + 1) - x_cc(j - 1))* & + (q_prim_vf(c_idx)%sf(j + 1, k, l) - q_prim_vf(c_idx)%sf(j - 1, k, l)) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + c_divs(2)%sf(j, k, l) = 1._wp/(y_cc(k + 1) - y_cc(k - 1))* & + (q_prim_vf(c_idx)%sf(j, k + 1, l) - q_prim_vf(c_idx)%sf(j, k - 1, l)) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) + if (p > 0) then + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m - c_divs(2)%sf(j, k, l) = 1._wp/(y_cc(k + 1) - y_cc(k - 1))* & - (q_prim_vf(c_idx)%sf(j, k + 1, l) - q_prim_vf(c_idx)%sf(j, k - 1, l)) + c_divs(3)%sf(j, k, l) = 1._wp/(z_cc(l + 1) - z_cc(l - 1))* & + (q_prim_vf(c_idx)%sf(j, k, l + 1) - q_prim_vf(c_idx)%sf(j, k, l - 1)) end do end do end do - #:endcall GPU_PARALLEL_LOOP - - if (p > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - c_divs(3)%sf(j, k, l) = 1._wp/(z_cc(l + 1) - z_cc(l - 1))* & - (q_prim_vf(c_idx)%sf(j, k, l + 1) - q_prim_vf(c_idx)%sf(j, k, l - 1)) - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - c_divs(num_dims + 1)%sf(j, k, l) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - c_divs(num_dims + 1)%sf(j, k, l) = & - c_divs(num_dims + 1)%sf(j, k, l) + & - c_divs(i)%sf(j, k, l)**2._wp - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + c_divs(num_dims + 1)%sf(j, k, l) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims c_divs(num_dims + 1)%sf(j, k, l) = & - sqrt(c_divs(num_dims + 1)%sf(j, k, l)) + c_divs(num_dims + 1)%sf(j, k, l) + & + c_divs(i)%sf(j, k, l)**2._wp end do + c_divs(num_dims + 1)%sf(j, k, l) = & + sqrt(c_divs(num_dims + 1)%sf(j, k, l)) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() call s_populate_capillary_buffers(c_divs, bc_type) @@ -343,44 +343,44 @@ contains $:GPU_UPDATE(device='[is1,is2,is3,iv]') if (recon_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() else if (recon_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() else if (recon_dir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end if #:endfor diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index d7ab8eb541..801e301a9d 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -507,52 +507,52 @@ contains end if if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=s) - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - if (s == 1 .and. nstage > 1) then - q_cons_ts(stor)%vf(i)%sf(j, k, l) = & - q_cons_ts(1)%vf(i)%sf(j, k, l) - end if - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - (rk_coef(s, 1)*q_cons_ts(1)%vf(i)%sf(j, k, l) & - + rk_coef(s, 2)*q_cons_ts(stor)%vf(i)%sf(j, k, l) & - + rk_coef(s, 3)*dt*rhs_vf(i)%sf(j, k, l))/rk_coef(s, 4) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + if (s == 1 .and. nstage > 1) then + q_cons_ts(stor)%vf(i)%sf(j, k, l) = & + q_cons_ts(1)%vf(i)%sf(j, k, l) + end if + q_cons_ts(1)%vf(i)%sf(j, k, l) = & + (rk_coef(s, 1)*q_cons_ts(1)%vf(i)%sf(j, k, l) & + + rk_coef(s, 2)*q_cons_ts(stor)%vf(i)%sf(j, k, l) & + + rk_coef(s, 3)*dt*rhs_vf(i)%sf(j, k, l))/rk_coef(s, 4) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then - #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - if (s == 1 .and. nstage > 1) then - pb_ts(stor)%sf(j, k, l, q, i) = & - pb_ts(1)%sf(j, k, l, q, i) - mv_ts(stor)%sf(j, k, l, q, i) = & - mv_ts(1)%sf(j, k, l, q, i) - end if - pb_ts(1)%sf(j, k, l, q, i) = & - (rk_coef(s, 1)*pb_ts(1)%sf(j, k, l, q, i) & - + rk_coef(s, 2)*pb_ts(stor)%sf(j, k, l, q, i) & - + rk_coef(s, 3)*dt*rhs_pb(j, k, l, q, i))/rk_coef(s, 4) - mv_ts(1)%sf(j, k, l, q, i) = & - (rk_coef(s, 1)*mv_ts(1)%sf(j, k, l, q, i) & - + rk_coef(s, 2)*mv_ts(stor)%sf(j, k, l, q, i) & - + rk_coef(s, 3)*dt*rhs_mv(j, k, l, q, i))/rk_coef(s, 4) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l,q]', collapse=5) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + if (s == 1 .and. nstage > 1) then + pb_ts(stor)%sf(j, k, l, q, i) = & + pb_ts(1)%sf(j, k, l, q, i) + mv_ts(stor)%sf(j, k, l, q, i) = & + mv_ts(1)%sf(j, k, l, q, i) + end if + pb_ts(1)%sf(j, k, l, q, i) = & + (rk_coef(s, 1)*pb_ts(1)%sf(j, k, l, q, i) & + + rk_coef(s, 2)*pb_ts(stor)%sf(j, k, l, q, i) & + + rk_coef(s, 3)*dt*rhs_pb(j, k, l, q, i))/rk_coef(s, 4) + mv_ts(1)%sf(j, k, l, q, i) = & + (rk_coef(s, 1)*mv_ts(1)%sf(j, k, l, q, i) & + + rk_coef(s, 2)*mv_ts(stor)%sf(j, k, l, q, i) & + + rk_coef(s, 3)*dt*rhs_mv(j, k, l, q, i))/rk_coef(s, 4) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, rk_coef(s, 3)*dt/rk_coef(s, 4)) @@ -682,24 +682,24 @@ contains idwint) end if - #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re]') - do l = 0, p - do k = 0, n - do j = 0, m - if (igr) then - call s_compute_enthalpy(q_cons_ts(1)%vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) - else - call s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) - end if + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,vel, alpha, Re]') + do l = 0, p + do k = 0, n + do j = 0, m + if (igr) then + call s_compute_enthalpy(q_cons_ts(1)%vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) + else + call s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) + end if - ! Compute mixture sound speed - call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c) + ! Compute mixture sound speed + call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c) - call s_compute_dt_from_cfl(vel, c, max_dt, rho, Re, j, k, l) - end do + call s_compute_dt_from_cfl(vel, c, max_dt, rho, Re, j, k, l) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() #:call GPU_PARALLEL(copyout='[dt_local]', copyin='[max_dt]') dt_local = minval(max_dt) @@ -730,18 +730,18 @@ contains call nvtxStartRange("RHS-BODYFORCES") call s_compute_body_forces_rhs(q_prim_vf_in, q_cons_vf, rhs_vf_in) - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = momxb, E_idx - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_vf(i)%sf(j, k, l) = q_cons_vf(i)%sf(j, k, l) + & - ldt*rhs_vf_in(i)%sf(j, k, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = momxb, E_idx + do l = 0, p + do k = 0, n + do j = 0, m + q_cons_vf(i)%sf(j, k, l) = q_cons_vf(i)%sf(j, k, l) + & + ldt*rhs_vf_in(i)%sf(j, k, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() call nvtxEndRange @@ -757,68 +757,68 @@ contains integer :: i, j, k, l !< Generic loop iterator if (t_step == t_step_start) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_prim_ts(3)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_prim_ts(3)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() elseif (t_step == t_step_start + 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_prim_ts(2)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_prim_ts(2)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() elseif (t_step == t_step_start + 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_prim_ts(1)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_prim_ts(1)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() elseif (t_step == t_step_start + 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_prim_ts(0)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_prim_ts(0)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() else ! All other timesteps - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_prim_ts(3)%vf(i)%sf(j, k, l) = q_prim_ts(2)%vf(i)%sf(j, k, l) - q_prim_ts(2)%vf(i)%sf(j, k, l) = q_prim_ts(1)%vf(i)%sf(j, k, l) - q_prim_ts(1)%vf(i)%sf(j, k, l) = q_prim_ts(0)%vf(i)%sf(j, k, l) - q_prim_ts(0)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_prim_ts(3)%vf(i)%sf(j, k, l) = q_prim_ts(2)%vf(i)%sf(j, k, l) + q_prim_ts(2)%vf(i)%sf(j, k, l) = q_prim_ts(1)%vf(i)%sf(j, k, l) + q_prim_ts(1)%vf(i)%sf(j, k, l) = q_prim_ts(0)%vf(i)%sf(j, k, l) + q_prim_ts(0)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_time_step_cycling diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index f6d8cb6b2d..13a4e4de0a 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -80,440 +80,440 @@ contains $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - tau_Re_vf(i)%sf(j, k, l) = 0._wp - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + tau_Re_vf(i)%sf(j, k, l) = 0._wp end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (shear_stress) then ! Shear stresses - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') - do l = is3_viscous%beg, is3_viscous%end - do k = -1, 1 - do j = is1_viscous%beg, is1_viscous%end - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) - if (bubbles_euler .and. num_fluids == 1) then - alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) - else - alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) - end if - end do - - if (bubbles_euler) then - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + do l = is3_viscous%beg, is3_viscous%end + do k = -1, 1 + do j = is1_viscous%beg, is1_viscous%end - if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else - rho_visc = alpha_rho_visc(1) - gamma_visc = gammas(1) - pi_inf_visc = pi_infs(1) - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) + if (bubbles_euler .and. num_fluids == 1) then + alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) else - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp - - alpha_visc_sum = 0._wp - - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) - alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) - alpha_visc_sum = alpha_visc_sum + alpha_visc(i) - end do - - alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) + end if + end do - end if + if (bubbles_euler) then + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp + if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do - - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_visc(i) = dflt_real - - if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & - + Re_visc(i) - end do - - Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) - - end do - end if + else if ((model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else + rho_visc = alpha_rho_visc(1) + gamma_visc = gammas(1) + pi_inf_visc = pi_infs(1) end if + else + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp - tau_Re(2, 1) = (grad_y_vf(1)%sf(j, k, l) + & - grad_x_vf(2)%sf(j, k, l))/ & - Re_visc(1) + alpha_visc_sum = 0._wp - tau_Re(2, 2) = (4._wp*grad_y_vf(2)%sf(j, k, l) & - - 2._wp*grad_x_vf(1)%sf(j, k, l) & - - 2._wp*q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & - (3._wp*Re_visc(1)) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - tau_Re_vf(contxe + i)%sf(j, k, l) = & - tau_Re_vf(contxe + i)%sf(j, k, l) - & - tau_Re(2, i) - - tau_Re_vf(E_idx)%sf(j, k, l) = & - tau_Re_vf(E_idx)%sf(j, k, l) - & - q_prim_vf(contxe + i)%sf(j, k, l)*tau_Re(2, i) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - end if + if (mpp_lim) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) + alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) + alpha_visc_sum = alpha_visc_sum + alpha_visc(i) + end do - if (bulk_stress) then ! Bulk stresses - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') - do l = is3_viscous%beg, is3_viscous%end - do k = -1, 1 - do j = is1_viscous%beg, is1_viscous%end + alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + + end if $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) - if (bubbles_euler .and. num_fluids == 1) then - alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) - else - alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) - end if + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do - if (bubbles_euler) then - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_visc(i) = dflt_real - if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else if ((model_eqns == 2) .and. (num_fluids > 2)) then + if (Re_size(i) > 0) Re_visc(i) = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + do q = 1, Re_size(i) + Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + + Re_visc(i) end do - else - rho_visc = alpha_rho_visc(1) - gamma_visc = gammas(1) - pi_inf_visc = pi_infs(1) - end if - else - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp - alpha_visc_sum = 0._wp + Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) - alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) - alpha_visc_sum = alpha_visc_sum + alpha_visc(i) - end do + end do + end if + end if - alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + tau_Re(2, 1) = (grad_y_vf(1)%sf(j, k, l) + & + grad_x_vf(2)%sf(j, k, l))/ & + Re_visc(1) + + tau_Re(2, 2) = (4._wp*grad_y_vf(2)%sf(j, k, l) & + - 2._wp*grad_x_vf(1)%sf(j, k, l) & + - 2._wp*q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & + (3._wp*Re_visc(1)) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + tau_Re_vf(contxe + i)%sf(j, k, l) = & + tau_Re_vf(contxe + i)%sf(j, k, l) - & + tau_Re(2, i) - end if + tau_Re_vf(E_idx)%sf(j, k, l) = & + tau_Re_vf(E_idx)%sf(j, k, l) - & + q_prim_vf(contxe + i)%sf(j, k, l)*tau_Re(2, i) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + if (bulk_stress) then ! Bulk stresses + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + do l = is3_viscous%beg, is3_viscous%end + do k = -1, 1 + do j = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) + if (bubbles_euler .and. num_fluids == 1) then + alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) + else + alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) + end if + end do + + if (bubbles_euler) then + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp + + if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do + else if ((model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else + rho_visc = alpha_rho_visc(1) + gamma_visc = gammas(1) + pi_inf_visc = pi_infs(1) + end if + else + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_visc(i) = dflt_real - - if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & - + Re_visc(i) - end do + alpha_visc_sum = 0._wp + + if (mpp_lim) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) + alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) + alpha_visc_sum = alpha_visc_sum + alpha_visc(i) + end do + + alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) - Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) + end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_visc(i) = dflt_real + + if (Re_size(i) > 0) Re_visc(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + + Re_visc(i) end do - end if + + Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) + + end do end if + end if - tau_Re(2, 2) = (grad_x_vf(1)%sf(j, k, l) + & - grad_y_vf(2)%sf(j, k, l) + & - q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & - Re_visc(2) + tau_Re(2, 2) = (grad_x_vf(1)%sf(j, k, l) + & + grad_y_vf(2)%sf(j, k, l) + & + q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & + Re_visc(2) - tau_Re_vf(momxb + 1)%sf(j, k, l) = & - tau_Re_vf(momxb + 1)%sf(j, k, l) - & - tau_Re(2, 2) + tau_Re_vf(momxb + 1)%sf(j, k, l) = & + tau_Re_vf(momxb + 1)%sf(j, k, l) - & + tau_Re(2, 2) - tau_Re_vf(E_idx)%sf(j, k, l) = & - tau_Re_vf(E_idx)%sf(j, k, l) - & - q_prim_vf(momxb + 1)%sf(j, k, l)*tau_Re(2, 2) + tau_Re_vf(E_idx)%sf(j, k, l) = & + tau_Re_vf(E_idx)%sf(j, k, l) - & + q_prim_vf(momxb + 1)%sf(j, k, l)*tau_Re(2, 2) - end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (p == 0) return if (shear_stress) then ! Shear stresses - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') - do l = is3_viscous%beg, is3_viscous%end - do k = -1, 1 - do j = is1_viscous%beg, is1_viscous%end - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) - if (bubbles_euler .and. num_fluids == 1) then - alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) - else - alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) - end if - end do - - if (bubbles_euler) then - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + do l = is3_viscous%beg, is3_viscous%end + do k = -1, 1 + do j = is1_viscous%beg, is1_viscous%end - if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else - rho_visc = alpha_rho_visc(1) - gamma_visc = gammas(1) - pi_inf_visc = pi_infs(1) - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) + if (bubbles_euler .and. num_fluids == 1) then + alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) else - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp - - alpha_visc_sum = 0._wp - - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) - alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) - alpha_visc_sum = alpha_visc_sum + alpha_visc(i) - end do - - alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) + end if + end do - end if + if (bubbles_euler) then + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp + if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do + else if ((model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else + rho_visc = alpha_rho_visc(1) + gamma_visc = gammas(1) + pi_inf_visc = pi_infs(1) + end if + else + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_visc(i) = dflt_real - - if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & - + Re_visc(i) - end do + alpha_visc_sum = 0._wp + + if (mpp_lim) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) + alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) + alpha_visc_sum = alpha_visc_sum + alpha_visc(i) + end do + + alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + + end if - Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_visc(i) = dflt_real + if (Re_size(i) > 0) Re_visc(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + + Re_visc(i) end do - end if + + Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) + + end do end if + end if - tau_Re(2, 2) = -(2._wp/3._wp)*grad_z_vf(3)%sf(j, k, l)/y_cc(k)/ & - Re_visc(1) + tau_Re(2, 2) = -(2._wp/3._wp)*grad_z_vf(3)%sf(j, k, l)/y_cc(k)/ & + Re_visc(1) - tau_Re(2, 3) = ((grad_z_vf(2)%sf(j, k, l) - & - q_prim_vf(momxe)%sf(j, k, l))/ & - y_cc(k) + grad_y_vf(3)%sf(j, k, l))/ & - Re_visc(1) + tau_Re(2, 3) = ((grad_z_vf(2)%sf(j, k, l) - & + q_prim_vf(momxe)%sf(j, k, l))/ & + y_cc(k) + grad_y_vf(3)%sf(j, k, l))/ & + Re_visc(1) - $:GPU_LOOP(parallelism='[seq]') - do i = 2, 3 - tau_Re_vf(contxe + i)%sf(j, k, l) = & - tau_Re_vf(contxe + i)%sf(j, k, l) - & - tau_Re(2, i) - - tau_Re_vf(E_idx)%sf(j, k, l) = & - tau_Re_vf(E_idx)%sf(j, k, l) - & - q_prim_vf(contxe + i)%sf(j, k, l)*tau_Re(2, i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 2, 3 + tau_Re_vf(contxe + i)%sf(j, k, l) = & + tau_Re_vf(contxe + i)%sf(j, k, l) - & + tau_Re(2, i) + tau_Re_vf(E_idx)%sf(j, k, l) = & + tau_Re_vf(E_idx)%sf(j, k, l) - & + q_prim_vf(contxe + i)%sf(j, k, l)*tau_Re(2, i) end do + end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (bulk_stress) then ! Bulk stresses - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') - do l = is3_viscous%beg, is3_viscous%end - do k = -1, 1 - do j = is1_viscous%beg, is1_viscous%end - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) - if (bubbles_euler .and. num_fluids == 1) then - alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) - else - alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) - end if - end do - - if (bubbles_euler) then - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + do l = is3_viscous%beg, is3_viscous%end + do k = -1, 1 + do j = is1_viscous%beg, is1_viscous%end - if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else - rho_visc = alpha_rho_visc(1) - gamma_visc = gammas(1) - pi_inf_visc = pi_infs(1) - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) + if (bubbles_euler .and. num_fluids == 1) then + alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) else - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp - - alpha_visc_sum = 0._wp - - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) - alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) - alpha_visc_sum = alpha_visc_sum + alpha_visc(i) - end do - - alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) + end if + end do - end if + if (bubbles_euler) then + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp + if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do + else if ((model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else + rho_visc = alpha_rho_visc(1) + gamma_visc = gammas(1) + pi_inf_visc = pi_infs(1) + end if + else + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_visc(i) = dflt_real - - if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & - + Re_visc(i) - end do + alpha_visc_sum = 0._wp + + if (mpp_lim) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) + alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) + alpha_visc_sum = alpha_visc_sum + alpha_visc(i) + end do + + alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) - Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_visc(i) = dflt_real + if (Re_size(i) > 0) Re_visc(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + + Re_visc(i) end do - end if + + Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) + + end do end if + end if - tau_Re(2, 2) = grad_z_vf(3)%sf(j, k, l)/y_cc(k)/ & - Re_visc(2) + tau_Re(2, 2) = grad_z_vf(3)%sf(j, k, l)/y_cc(k)/ & + Re_visc(2) - tau_Re_vf(momxb + 1)%sf(j, k, l) = & - tau_Re_vf(momxb + 1)%sf(j, k, l) - & - tau_Re(2, 2) + tau_Re_vf(momxb + 1)%sf(j, k, l) = & + tau_Re_vf(momxb + 1)%sf(j, k, l) - & + tau_Re(2, 2) - tau_Re_vf(E_idx)%sf(j, k, l) = & - tau_Re_vf(E_idx)%sf(j, k, l) - & - q_prim_vf(momxb + 1)%sf(j, k, l)*tau_Re(2, 2) + tau_Re_vf(E_idx)%sf(j, k, l) = & + tau_Re_vf(E_idx)%sf(j, k, l) - & + q_prim_vf(momxb + 1)%sf(j, k, l)*tau_Re(2, 2) - end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_compute_viscous_stress_tensor @@ -598,363 +598,363 @@ contains $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = iy%beg, iy%end - do j = is1_viscous%beg + 1, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqL_prim_dx_n(1)%vf(i)%sf(j, k, l) = & - (q_prim_qp%vf(i)%sf(j, k, l) - & - q_prim_qp%vf(i)%sf(j - 1, k, l))/ & - (x_cc(j) - x_cc(j - 1)) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do k = iy%beg, iy%end + do j = is1_viscous%beg + 1, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqL_prim_dx_n(1)%vf(i)%sf(j, k, l) = & + (q_prim_qp%vf(i)%sf(j, k, l) - & + q_prim_qp%vf(i)%sf(j - 1, k, l))/ & + (x_cc(j) - x_cc(j - 1)) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dx_n(1)%vf(i)%sf(j, k, l) = & - (q_prim_qp%vf(i)%sf(j + 1, k, l) - & - q_prim_qp%vf(i)%sf(j, k, l))/ & - (x_cc(j + 1) - x_cc(j)) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dx_n(1)%vf(i)%sf(j, k, l) = & + (q_prim_qp%vf(i)%sf(j + 1, k, l) - & + q_prim_qp%vf(i)%sf(j, k, l))/ & + (x_cc(j + 1) - x_cc(j)) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (n > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do j = is2_viscous%beg + 1, is2_viscous%end - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqL_prim_dy_n(2)%vf(i)%sf(k, j, l) = & - (q_prim_qp%vf(i)%sf(k, j, l) - & - q_prim_qp%vf(i)%sf(k, j - 1, l))/ & - (y_cc(j) - y_cc(j - 1)) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do j = is2_viscous%beg + 1, is2_viscous%end + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqL_prim_dy_n(2)%vf(i)%sf(k, j, l) = & + (q_prim_qp%vf(i)%sf(k, j, l) - & + q_prim_qp%vf(i)%sf(k, j - 1, l))/ & + (y_cc(j) - y_cc(j - 1)) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do j = is2_viscous%beg, is2_viscous%end - 1 - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dy_n(2)%vf(i)%sf(k, j, l) = & - (q_prim_qp%vf(i)%sf(k, j + 1, l) - & - q_prim_qp%vf(i)%sf(k, j, l))/ & - (y_cc(j + 1) - y_cc(j)) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do j = is2_viscous%beg, is2_viscous%end - 1 + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dy_n(2)%vf(i)%sf(k, j, l) = & + (q_prim_qp%vf(i)%sf(k, j + 1, l) - & + q_prim_qp%vf(i)%sf(k, j, l))/ & + (y_cc(j + 1) - y_cc(j)) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do j = is2_viscous%beg + 1, is2_viscous%end - do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = & - (dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, j, l) + & - dqL_prim_dx_n(1)%vf(i)%sf(k, j - 1, l) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, j - 1, l)) - - dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & - dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do j = is2_viscous%beg + 1, is2_viscous%end + do k = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = & + (dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, j, l) + & + dqL_prim_dx_n(1)%vf(i)%sf(k, j - 1, l) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, j - 1, l)) + + dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & + dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do j = is2_viscous%beg, is2_viscous%end - 1 - do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = & - (dqL_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & - dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, j, l)) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do j = is2_viscous%beg, is2_viscous%end - 1 + do k = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = & + (dqL_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & + dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, j, l)) - dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & - dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) + dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & + dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) - end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg + 1, is2_viscous%end - 1 - do j = is1_viscous%beg + 1, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = & - (dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & - dqR_prim_dy_n(2)%vf(i)%sf(j, k, l) + & - dqL_prim_dy_n(2)%vf(i)%sf(j - 1, k, l) + & - dqR_prim_dy_n(2)%vf(i)%sf(j - 1, k, l)) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg + 1, is2_viscous%end - 1 + do j = is1_viscous%beg + 1, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = & + (dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & + dqR_prim_dy_n(2)%vf(i)%sf(j, k, l) + & + dqL_prim_dy_n(2)%vf(i)%sf(j - 1, k, l) + & + dqR_prim_dy_n(2)%vf(i)%sf(j - 1, k, l)) - dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & - dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) + dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & + dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) - end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg + 1, is2_viscous%end - 1 - do j = is1_viscous%beg, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = & - (dqL_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & - dqR_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & - dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & - dqR_prim_dy_n(2)%vf(i)%sf(j, k, l)) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg + 1, is2_viscous%end - 1 + do j = is1_viscous%beg, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = & + (dqL_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & + dqR_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & + dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & + dqR_prim_dy_n(2)%vf(i)%sf(j, k, l)) - dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & - dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) + dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & + dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) - end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (p > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do j = is3_viscous%beg + 1, is3_viscous%end - do l = is2_viscous%beg, is2_viscous%end - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do j = is3_viscous%beg + 1, is3_viscous%end + do l = is2_viscous%beg, is2_viscous%end + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqL_prim_dz_n(3)%vf(i)%sf(k, l, j) = & - (q_prim_qp%vf(i)%sf(k, l, j) - & - q_prim_qp%vf(i)%sf(k, l, j - 1))/ & - (z_cc(j) - z_cc(j - 1)) - end do + dqL_prim_dz_n(3)%vf(i)%sf(k, l, j) = & + (q_prim_qp%vf(i)%sf(k, l, j) - & + q_prim_qp%vf(i)%sf(k, l, j - 1))/ & + (z_cc(j) - z_cc(j - 1)) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) - do j = is3_viscous%beg, is3_viscous%end - 1 - do l = is2_viscous%beg, is2_viscous%end - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do j = is3_viscous%beg, is3_viscous%end - 1 + do l = is2_viscous%beg, is2_viscous%end + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqR_prim_dz_n(3)%vf(i)%sf(k, l, j) = & - (q_prim_qp%vf(i)%sf(k, l, j + 1) - & - q_prim_qp%vf(i)%sf(k, l, j))/ & - (z_cc(j + 1) - z_cc(j)) - end do + dqR_prim_dz_n(3)%vf(i)%sf(k, l, j) = & + (q_prim_qp%vf(i)%sf(k, l, j + 1) - & + q_prim_qp%vf(i)%sf(k, l, j))/ & + (z_cc(j + 1) - z_cc(j)) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg + 1, is3_viscous%end - 1 - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg + 1, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = is3_viscous%beg + 1, is3_viscous%end - 1 + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg + 1, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) = & - (dqL_prim_dz_n(3)%vf(i)%sf(j, k, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(j, k, l) + & - dqL_prim_dz_n(3)%vf(i)%sf(j - 1, k, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(j - 1, k, l)) + dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) = & + (dqL_prim_dz_n(3)%vf(i)%sf(j, k, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(j, k, l) + & + dqL_prim_dz_n(3)%vf(i)%sf(j - 1, k, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(j - 1, k, l)) - dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & - dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) + dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & + dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) - end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg + 1, is3_viscous%end - 1 - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = is3_viscous%beg + 1, is3_viscous%end - 1 + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) = & - (dqL_prim_dz_n(3)%vf(i)%sf(j + 1, k, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(j + 1, k, l) + & - dqL_prim_dz_n(3)%vf(i)%sf(j, k, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(j, k, l)) + dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) = & + (dqL_prim_dz_n(3)%vf(i)%sf(j + 1, k, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(j + 1, k, l) + & + dqL_prim_dz_n(3)%vf(i)%sf(j, k, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(j, k, l)) - dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & - dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) + dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & + dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) - end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg + 1, is3_viscous%end - 1 - do j = is2_viscous%beg + 1, is2_viscous%end - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = is3_viscous%beg + 1, is3_viscous%end - 1 + do j = is2_viscous%beg + 1, is2_viscous%end + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) = & - (dqL_prim_dz_n(3)%vf(i)%sf(k, j, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(k, j, l) + & - dqL_prim_dz_n(3)%vf(i)%sf(k, j - 1, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(k, j - 1, l)) + dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) = & + (dqL_prim_dz_n(3)%vf(i)%sf(k, j, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(k, j, l) + & + dqL_prim_dz_n(3)%vf(i)%sf(k, j - 1, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(k, j - 1, l)) - dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & - dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) + dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & + dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) - end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg + 1, is3_viscous%end - 1 - do j = is2_viscous%beg, is2_viscous%end - 1 - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = is3_viscous%beg + 1, is3_viscous%end - 1 + do j = is2_viscous%beg, is2_viscous%end - 1 + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) = & - (dqL_prim_dz_n(3)%vf(i)%sf(k, j + 1, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(k, j + 1, l) + & - dqL_prim_dz_n(3)%vf(i)%sf(k, j, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(k, j, l)) + dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) = & + (dqL_prim_dz_n(3)%vf(i)%sf(k, j + 1, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(k, j + 1, l) + & + dqL_prim_dz_n(3)%vf(i)%sf(k, j, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(k, j, l)) - dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & - dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) + dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & + dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) - end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) - do j = is3_viscous%beg + 1, is3_viscous%end - do l = is2_viscous%beg + 1, is2_viscous%end - 1 - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do j = is3_viscous%beg + 1, is3_viscous%end + do l = is2_viscous%beg + 1, is2_viscous%end - 1 + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) = & - (dqL_prim_dy_n(2)%vf(i)%sf(k, l, j) + & - dqR_prim_dy_n(2)%vf(i)%sf(k, l, j) + & - dqL_prim_dy_n(2)%vf(i)%sf(k, l, j - 1) + & - dqR_prim_dy_n(2)%vf(i)%sf(k, l, j - 1)) + dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) = & + (dqL_prim_dy_n(2)%vf(i)%sf(k, l, j) + & + dqR_prim_dy_n(2)%vf(i)%sf(k, l, j) + & + dqL_prim_dy_n(2)%vf(i)%sf(k, l, j - 1) + & + dqR_prim_dy_n(2)%vf(i)%sf(k, l, j - 1)) - dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & - dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) + dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & + dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) - end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP(collapse=3) - do j = is3_viscous%beg, is3_viscous%end - 1 - do l = is2_viscous%beg + 1, is2_viscous%end - 1 - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do j = is3_viscous%beg, is3_viscous%end - 1 + do l = is2_viscous%beg + 1, is2_viscous%end - 1 + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) = & - (dqL_prim_dy_n(2)%vf(i)%sf(k, l, j + 1) + & - dqR_prim_dy_n(2)%vf(i)%sf(k, l, j + 1) + & - dqL_prim_dy_n(2)%vf(i)%sf(k, l, j) + & - dqR_prim_dy_n(2)%vf(i)%sf(k, l, j)) + dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) = & + (dqL_prim_dy_n(2)%vf(i)%sf(k, l, j + 1) + & + dqR_prim_dy_n(2)%vf(i)%sf(k, l, j + 1) + & + dqL_prim_dy_n(2)%vf(i)%sf(k, l, j) + & + dqR_prim_dy_n(2)%vf(i)%sf(k, l, j)) - dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & - dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) + dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & + dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) - end do end do end do end do - #:endcall GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) - do j = is3_viscous%beg + 1, is3_viscous%end - do l = is2_viscous%beg, is2_viscous%end - do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + end do + $:END_GPU_PARALLEL_LOOP() + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do j = is3_viscous%beg + 1, is3_viscous%end + do l = is2_viscous%beg, is2_viscous%end + do k = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) = & - (dqL_prim_dx_n(1)%vf(i)%sf(k, l, j) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, l, j) + & - dqL_prim_dx_n(1)%vf(i)%sf(k, l, j - 1) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, l, j - 1)) + dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) = & + (dqL_prim_dx_n(1)%vf(i)%sf(k, l, j) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, l, j) + & + dqL_prim_dx_n(1)%vf(i)%sf(k, l, j - 1) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, l, j - 1)) - dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & - dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) + dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & + dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) - end do end do end do end do - #:endcall GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) - do j = is3_viscous%beg, is3_viscous%end - 1 - do l = is2_viscous%beg, is2_viscous%end - do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = & - (dqL_prim_dx_n(1)%vf(i)%sf(k, l, j + 1) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, l, j + 1) + & - dqL_prim_dx_n(1)%vf(i)%sf(k, l, j) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, l, j)) + end do + $:END_GPU_PARALLEL_LOOP() + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do j = is3_viscous%beg, is3_viscous%end - 1 + do l = is2_viscous%beg, is2_viscous%end + do k = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = & + (dqL_prim_dx_n(1)%vf(i)%sf(k, l, j + 1) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, l, j + 1) + & + dqL_prim_dx_n(1)%vf(i)%sf(k, l, j) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, l, j)) - dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & - dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) + dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & + dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) - end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() do i = iv%beg, iv%end call s_compute_fd_gradient(q_prim_qp%vf(i), & @@ -1047,44 +1047,44 @@ contains if (viscous) then if (weno_Re_flux) then if (norm_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3_viscous%beg, is3_viscous%end - do j = is1_viscous%beg, is1_viscous%end - do k = is2_viscous%beg, is2_viscous%end - vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) - vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = iv%beg, iv%end + do l = is3_viscous%beg, is3_viscous%end + do j = is1_viscous%beg, is1_viscous%end + do k = is2_viscous%beg, is2_viscous%end + vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) + vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() elseif (norm_dir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do j = is1_viscous%beg, is1_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do l = is3_viscous%beg, is3_viscous%end - vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) - vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = iv%beg, iv%end + do j = is1_viscous%beg, is1_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do l = is3_viscous%beg, is3_viscous%end + vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) + vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() elseif (norm_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i) - vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = iv%beg, iv%end + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i) + vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end if end if @@ -1151,44 +1151,44 @@ contains if (viscous) then if (weno_Re_flux) then if (norm_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3_viscous%beg, is3_viscous%end - do j = is1_viscous%beg, is1_viscous%end - do k = is2_viscous%beg, is2_viscous%end - vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) - vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = iv%beg, iv%end + do l = is3_viscous%beg, is3_viscous%end + do j = is1_viscous%beg, is1_viscous%end + do k = is2_viscous%beg, is2_viscous%end + vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) + vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() elseif (norm_dir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do j = is1_viscous%beg, is1_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do l = is3_viscous%beg, is3_viscous%end - vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) - vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = iv%beg, iv%end + do j = is1_viscous%beg, is1_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do l = is3_viscous%beg, is3_viscous%end + vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) + vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() elseif (norm_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i) - vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = iv%beg, iv%end + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i) + vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end if end if @@ -1244,23 +1244,23 @@ contains ! cell-boundaries, to calculate the cell-averaged first-order ! spatial derivatives inside the cell. - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dv_ds_vf(i)%sf(j, k, l) = & - 1._wp/((1._wp + wa_flg)*dL(j)) & - *(wa_flg*vL_vf(i)%sf(j + 1, k, l) & - + vR_vf(i)%sf(j, k, l) & - - vL_vf(i)%sf(j, k, l) & - - wa_flg*vR_vf(i)%sf(j - 1, k, l)) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dv_ds_vf(i)%sf(j, k, l) = & + 1._wp/((1._wp + wa_flg)*dL(j)) & + *(wa_flg*vL_vf(i)%sf(j + 1, k, l) & + + vR_vf(i)%sf(j, k, l) & + - vL_vf(i)%sf(j, k, l) & + - wa_flg*vR_vf(i)%sf(j - 1, k, l)) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() ! END: First-Order Spatial Derivatives in x-direction @@ -1273,23 +1273,23 @@ contains ! cell-boundaries, to calculate the cell-averaged first-order ! spatial derivatives inside the cell. - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg + 1, is2_viscous%end - 1 - do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dv_ds_vf(i)%sf(j, k, l) = & - 1._wp/((1._wp + wa_flg)*dL(k)) & - *(wa_flg*vL_vf(i)%sf(j, k + 1, l) & - + vR_vf(i)%sf(j, k, l) & - - vL_vf(i)%sf(j, k, l) & - - wa_flg*vR_vf(i)%sf(j, k - 1, l)) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg + 1, is2_viscous%end - 1 + do j = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dv_ds_vf(i)%sf(j, k, l) = & + 1._wp/((1._wp + wa_flg)*dL(k)) & + *(wa_flg*vL_vf(i)%sf(j, k + 1, l) & + + vR_vf(i)%sf(j, k, l) & + - vL_vf(i)%sf(j, k, l) & + - wa_flg*vR_vf(i)%sf(j, k - 1, l)) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() ! END: First-Order Spatial Derivatives in y-direction @@ -1302,23 +1302,23 @@ contains ! cell-boundaries, to calculate the cell-averaged first-order ! spatial derivatives inside the cell. - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg + 1, is3_viscous%end - 1 - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dv_ds_vf(i)%sf(j, k, l) = & - 1._wp/((1._wp + wa_flg)*dL(l)) & - *(wa_flg*vL_vf(i)%sf(j, k, l + 1) & - + vR_vf(i)%sf(j, k, l) & - - vL_vf(i)%sf(j, k, l) & - - wa_flg*vR_vf(i)%sf(j, k, l - 1)) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) + do l = is3_viscous%beg + 1, is3_viscous%end - 1 + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dv_ds_vf(i)%sf(j, k, l) = & + 1._wp/((1._wp + wa_flg)*dL(l)) & + *(wa_flg*vL_vf(i)%sf(j, k, l + 1) & + + vR_vf(i)%sf(j, k, l) & + - vL_vf(i)%sf(j, k, l) & + - wa_flg*vR_vf(i)%sf(j, k, l - 1)) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if ! END: First-Order Spatial Derivatives in z-direction @@ -1358,150 +1358,150 @@ contains $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - grad_x%sf(j, k, l) = & - (var%sf(j + 1, k, l) - var%sf(j - 1, k, l))/ & - (x_cc(j + 1) - x_cc(j - 1)) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + grad_x%sf(j, k, l) = & + (var%sf(j + 1, k, l) - var%sf(j - 1, k, l))/ & + (x_cc(j + 1) - x_cc(j - 1)) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (n > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - grad_y%sf(j, k, l) = & - (var%sf(j, k + 1, l) - var%sf(j, k - 1, l))/ & - (y_cc(k + 1) - y_cc(k - 1)) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + grad_y%sf(j, k, l) = & + (var%sf(j, k + 1, l) - var%sf(j, k - 1, l))/ & + (y_cc(k + 1) - y_cc(k - 1)) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (p > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - grad_z%sf(j, k, l) = & - (var%sf(j, k, l + 1) - var%sf(j, k, l - 1))/ & - (z_cc(l + 1) - z_cc(l - 1)) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + grad_z%sf(j, k, l) = & + (var%sf(j, k, l + 1) - var%sf(j, k, l - 1))/ & + (z_cc(l + 1) - z_cc(l - 1)) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[k,l]', collapse=2) + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + grad_x%sf(idwbuff(1)%beg, k, l) = & + (-3._wp*var%sf(idwbuff(1)%beg, k, l) + 4._wp*var%sf(idwbuff(1)%beg + 1, k, l) - var%sf(idwbuff(1)%beg + 2, k, l))/ & + (x_cc(idwbuff(1)%beg + 2) - x_cc(idwbuff(1)%beg)) + grad_x%sf(idwbuff(1)%end, k, l) = & + (+3._wp*var%sf(idwbuff(1)%end, k, l) - 4._wp*var%sf(idwbuff(1)%end - 1, k, l) + var%sf(idwbuff(1)%end - 2, k, l))/ & + (x_cc(idwbuff(1)%end) - x_cc(idwbuff(1)%end - 2)) + end do + end do + $:END_GPU_PARALLEL_LOOP() + if (n > 0) then + $:GPU_PARALLEL_LOOP(private='[j,l]', collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - grad_x%sf(idwbuff(1)%beg, k, l) = & - (-3._wp*var%sf(idwbuff(1)%beg, k, l) + 4._wp*var%sf(idwbuff(1)%beg + 1, k, l) - var%sf(idwbuff(1)%beg + 2, k, l))/ & - (x_cc(idwbuff(1)%beg + 2) - x_cc(idwbuff(1)%beg)) - grad_x%sf(idwbuff(1)%end, k, l) = & - (+3._wp*var%sf(idwbuff(1)%end, k, l) - 4._wp*var%sf(idwbuff(1)%end - 1, k, l) + var%sf(idwbuff(1)%end - 2, k, l))/ & - (x_cc(idwbuff(1)%end) - x_cc(idwbuff(1)%end - 2)) + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_y%sf(j, idwbuff(2)%beg, l) = & + (-3._wp*var%sf(j, idwbuff(2)%beg, l) + 4._wp*var%sf(j, idwbuff(2)%beg + 1, l) - var%sf(j, idwbuff(2)%beg + 2, l))/ & + (y_cc(idwbuff(2)%beg + 2) - y_cc(idwbuff(2)%beg)) + grad_y%sf(j, idwbuff(2)%end, l) = & + (+3._wp*var%sf(j, idwbuff(2)%end, l) - 4._wp*var%sf(j, idwbuff(2)%end - 1, l) + var%sf(j, idwbuff(2)%end - 2, l))/ & + (y_cc(idwbuff(2)%end) - y_cc(idwbuff(2)%end - 2)) end do end do - #:endcall GPU_PARALLEL_LOOP - if (n > 0) then - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end + $:END_GPU_PARALLEL_LOOP() + if (p > 0) then + $:GPU_PARALLEL_LOOP(private='[j,k]', collapse=2) + do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end - grad_y%sf(j, idwbuff(2)%beg, l) = & - (-3._wp*var%sf(j, idwbuff(2)%beg, l) + 4._wp*var%sf(j, idwbuff(2)%beg + 1, l) - var%sf(j, idwbuff(2)%beg + 2, l))/ & - (y_cc(idwbuff(2)%beg + 2) - y_cc(idwbuff(2)%beg)) - grad_y%sf(j, idwbuff(2)%end, l) = & - (+3._wp*var%sf(j, idwbuff(2)%end, l) - 4._wp*var%sf(j, idwbuff(2)%end - 1, l) + var%sf(j, idwbuff(2)%end - 2, l))/ & - (y_cc(idwbuff(2)%end) - y_cc(idwbuff(2)%end - 2)) + grad_z%sf(j, k, idwbuff(3)%beg) = & + (-3._wp*var%sf(j, k, idwbuff(3)%beg) + 4._wp*var%sf(j, k, idwbuff(3)%beg + 1) - var%sf(j, k, idwbuff(3)%beg + 2))/ & + (z_cc(idwbuff(3)%beg + 2) - z_cc(is3_viscous%beg)) + grad_z%sf(j, k, idwbuff(3)%end) = & + (+3._wp*var%sf(j, k, idwbuff(3)%end) - 4._wp*var%sf(j, k, idwbuff(3)%end - 1) + var%sf(j, k, idwbuff(3)%end - 2))/ & + (z_cc(idwbuff(3)%end) - z_cc(idwbuff(3)%end - 2)) end do end do - #:endcall GPU_PARALLEL_LOOP - if (p > 0) then - #:call GPU_PARALLEL_LOOP(collapse=2) - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_z%sf(j, k, idwbuff(3)%beg) = & - (-3._wp*var%sf(j, k, idwbuff(3)%beg) + 4._wp*var%sf(j, k, idwbuff(3)%beg + 1) - var%sf(j, k, idwbuff(3)%beg + 2))/ & - (z_cc(idwbuff(3)%beg + 2) - z_cc(is3_viscous%beg)) - grad_z%sf(j, k, idwbuff(3)%end) = & - (+3._wp*var%sf(j, k, idwbuff(3)%end) - 4._wp*var%sf(j, k, idwbuff(3)%end - 1) + var%sf(j, k, idwbuff(3)%end - 2))/ & - (z_cc(idwbuff(3)%end) - z_cc(idwbuff(3)%end - 2)) - end do - end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if if (bc_x%beg <= BC_GHOST_EXTRAP) then - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - grad_x%sf(0, k, l) = (-3._wp*var%sf(0, k, l) + 4._wp*var%sf(1, k, l) - var%sf(2, k, l))/ & - (x_cc(2) - x_cc(0)) - end do + $:GPU_PARALLEL_LOOP(private='[k,l]', collapse=2) + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + grad_x%sf(0, k, l) = (-3._wp*var%sf(0, k, l) + 4._wp*var%sf(1, k, l) - var%sf(2, k, l))/ & + (x_cc(2) - x_cc(0)) end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (bc_x%end <= BC_GHOST_EXTRAP) then - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - grad_x%sf(m, k, l) = (3._wp*var%sf(m, k, l) - 4._wp*var%sf(m - 1, k, l) + var%sf(m - 2, k, l))/ & - (x_cc(m) - x_cc(m - 2)) - end do + $:GPU_PARALLEL_LOOP(private='[k,l]', collapse=2) + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + grad_x%sf(m, k, l) = (3._wp*var%sf(m, k, l) - 4._wp*var%sf(m - 1, k, l) + var%sf(m - 2, k, l))/ & + (x_cc(m) - x_cc(m - 2)) end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (n > 0) then if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_NULL) then - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_y%sf(j, 0, l) = (-3._wp*var%sf(j, 0, l) + 4._wp*var%sf(j, 1, l) - var%sf(j, 2, l))/ & - (y_cc(2) - y_cc(0)) - end do + $:GPU_PARALLEL_LOOP(private='[j,l]', collapse=2) + do l = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_y%sf(j, 0, l) = (-3._wp*var%sf(j, 0, l) + 4._wp*var%sf(j, 1, l) - var%sf(j, 2, l))/ & + (y_cc(2) - y_cc(0)) end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (bc_y%end <= BC_GHOST_EXTRAP) then - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_y%sf(j, n, l) = (3._wp*var%sf(j, n, l) - 4._wp*var%sf(j, n - 1, l) + var%sf(j, n - 2, l))/ & - (y_cc(n) - y_cc(n - 2)) - end do + $:GPU_PARALLEL_LOOP(private='[j,l]', collapse=2) + do l = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_y%sf(j, n, l) = (3._wp*var%sf(j, n, l) - 4._wp*var%sf(j, n - 1, l) + var%sf(j, n - 2, l))/ & + (y_cc(n) - y_cc(n - 2)) end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (p > 0) then if (bc_z%beg <= BC_GHOST_EXTRAP) then - #:call GPU_PARALLEL_LOOP(collapse=2) - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_z%sf(j, k, 0) = & - (-3._wp*var%sf(j, k, 0) + 4._wp*var%sf(j, k, 1) - var%sf(j, k, 2))/ & - (z_cc(2) - z_cc(0)) - end do + $:GPU_PARALLEL_LOOP(private='[j,k]', collapse=2) + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_z%sf(j, k, 0) = & + (-3._wp*var%sf(j, k, 0) + 4._wp*var%sf(j, k, 1) - var%sf(j, k, 2))/ & + (z_cc(2) - z_cc(0)) end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if if (bc_z%end <= BC_GHOST_EXTRAP) then - #:call GPU_PARALLEL_LOOP(collapse=2) - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_z%sf(j, k, p) = & - (3._wp*var%sf(j, k, p) - 4._wp*var%sf(j, k, p - 1) + var%sf(j, k, p - 2))/ & - (z_cc(p) - z_cc(p - 2)) - end do + $:GPU_PARALLEL_LOOP(private='[j,k]', collapse=2) + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_z%sf(j, k, p) = & + (3._wp*var%sf(j, k, p) - 4._wp*var%sf(j, k, p - 1) + var%sf(j, k, p - 2))/ & + (z_cc(p) - z_cc(p - 2)) end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end if end if diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 6874237a4e..22d9add5dc 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -670,237 +670,237 @@ contains if (weno_order == 1) then if (weno_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - vL_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - vR_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, ubound(v_vf, 1) + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + vL_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + vR_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() else if (weno_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - vL_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, ubound(v_vf, 1) + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + vL_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + vR_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() else if (weno_dir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - vL_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, ubound(v_vf, 1) + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + vL_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + vR_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if elseif (weno_order == 3) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[beta,dvd,poly,omega,alpha,tau]') - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - do i = 1, v_size - ! reconstruct from left side + $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,beta,dvd,poly,omega,alpha,tau]') + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + do i = 1, v_size + ! reconstruct from left side + + dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & + - v_rs_ws_${XYZ}$ (j, k, l, i) + dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 1, k, l, i) + + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(0) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(-1) + + beta(0) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(0)*dvd(0) & + + weno_eps + beta(1) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(-1)*dvd(-1) & + + weno_eps + + if (wenojs) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + + elseif (mapped_weno) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + omega = alpha/sum(alpha) + alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) - dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & - - v_rs_ws_${XYZ}$ (j, k, l, i) - dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 1, k, l, i) + elseif (wenoz) then + ! Borges, et al. (2008) - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(0) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(-1) + tau = abs(beta(1) - beta(0)) + alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + tau/beta) - beta(0) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(0)*dvd(0) & - + weno_eps - beta(1) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(-1)*dvd(-1) & - + weno_eps + end if - if (wenojs) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + omega = alpha/sum(alpha) - elseif (mapped_weno) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) - omega = alpha/sum(alpha) - alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) + vL_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) - elseif (wenoz) then - ! Borges, et al. (2008) + ! reconstruct from right side - tau = abs(beta(1) - beta(0)) - alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + tau/beta) + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(0) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(-1) - end if + if (wenojs) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + elseif (mapped_weno) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) omega = alpha/sum(alpha) + alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) - vL_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + elseif (wenoz) then - ! reconstruct from right side + alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + tau/beta) - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(0) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(-1) + end if - if (wenojs) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + omega = alpha/sum(alpha) - elseif (mapped_weno) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) - omega = alpha/sum(alpha) - alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) - - elseif (wenoz) then - - alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + tau/beta) - - end if - - omega = alpha/sum(alpha) + vR_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) - vR_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) - - end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if #:endfor elseif (weno_order == 5) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - #:call GPU_PARALLEL_LOOP(collapse=3,private='[dvd,poly,beta,alpha,omega,tau,delta]') - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, v_size - ! reconstruct from left side + $:GPU_PARALLEL_LOOP(collapse=3,private='[i,j,k,l,dvd,poly,beta,alpha,omega,tau,delta]') + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + $:GPU_LOOP(parallelism='[seq]') + do i = 1, v_size + ! reconstruct from left side + + dvd(1) = v_rs_ws_${XYZ}$ (j + 2, k, l, i) & + - v_rs_ws_${XYZ}$ (j + 1, k, l, i) + dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & + - v_rs_ws_${XYZ}$ (j, k, l, i) + dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 1, k, l, i) + dvd(-2) = v_rs_ws_${XYZ}$ (j - 1, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 2, k, l, i) + + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(1) & + + poly_coef_cbL_${XYZ}$ (j, 0, 1)*dvd(0) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(0) & + + poly_coef_cbL_${XYZ}$ (j, 1, 1)*dvd(-1) + poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 2, 0)*dvd(-1) & + + poly_coef_cbL_${XYZ}$ (j, 2, 1)*dvd(-2) + + beta(0) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(1)*dvd(1) & + + beta_coef_${XYZ}$ (j, 0, 1)*dvd(1)*dvd(0) & + + beta_coef_${XYZ}$ (j, 0, 2)*dvd(0)*dvd(0) & + + weno_eps + beta(1) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(0)*dvd(0) & + + beta_coef_${XYZ}$ (j, 1, 1)*dvd(0)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 1, 2)*dvd(-1)*dvd(-1) & + + weno_eps + beta(2) = beta_coef_${XYZ}$ (j, 2, 0)*dvd(-1)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 2, 1)*dvd(-1)*dvd(-2) & + + beta_coef_${XYZ}$ (j, 2, 2)*dvd(-2)*dvd(-2) & + + weno_eps + + if (wenojs) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + + elseif (mapped_weno) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + omega = alpha/sum(alpha) + alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) - dvd(1) = v_rs_ws_${XYZ}$ (j + 2, k, l, i) & - - v_rs_ws_${XYZ}$ (j + 1, k, l, i) - dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & - - v_rs_ws_${XYZ}$ (j, k, l, i) - dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 1, k, l, i) - dvd(-2) = v_rs_ws_${XYZ}$ (j - 1, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 2, k, l, i) + elseif (wenoz) then + ! Borges, et al. (2008) - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(1) & - + poly_coef_cbL_${XYZ}$ (j, 0, 1)*dvd(0) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(0) & - + poly_coef_cbL_${XYZ}$ (j, 1, 1)*dvd(-1) - poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 2, 0)*dvd(-1) & - + poly_coef_cbL_${XYZ}$ (j, 2, 1)*dvd(-2) + tau = abs(beta(2) - beta(0)) ! Equation 25 + alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + tau/beta) ! Equation 28 (note: weno_eps was already added to beta) - beta(0) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(1)*dvd(1) & - + beta_coef_${XYZ}$ (j, 0, 1)*dvd(1)*dvd(0) & - + beta_coef_${XYZ}$ (j, 0, 2)*dvd(0)*dvd(0) & - + weno_eps - beta(1) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(0)*dvd(0) & - + beta_coef_${XYZ}$ (j, 1, 1)*dvd(0)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 1, 2)*dvd(-1)*dvd(-1) & - + weno_eps - beta(2) = beta_coef_${XYZ}$ (j, 2, 0)*dvd(-1)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 2, 1)*dvd(-1)*dvd(-2) & - + beta_coef_${XYZ}$ (j, 2, 2)*dvd(-2)*dvd(-2) & - + weno_eps + elseif (teno) then + ! Fu, et al. (2016) + ! Fu''s code: https://dx.doi.org/10.13140/RG.2.2.36250.34247 + tau = abs(beta(2) - beta(0)) + alpha = 1._wp + tau/beta ! Equation 22 (reuse alpha as gamma; pick C=1 & q=6) + alpha = (alpha*alpha*alpha)**2._wp ! Equation 22 cont. (some CPU compilers cannot optimize x**6.0) + omega = alpha/sum(alpha) ! Equation 25 (reuse omega as xi) + delta = merge(0._wp, 1._wp, omega < teno_CT)! Equation 26 + alpha = delta*d_cbL_${XYZ}$ (:, j) ! Equation 27 - if (wenojs) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + end if - elseif (mapped_weno) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) - omega = alpha/sum(alpha) - alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) + omega = alpha/sum(alpha) - elseif (wenoz) then - ! Borges, et al. (2008) + vL_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) - tau = abs(beta(2) - beta(0)) ! Equation 25 - alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + tau/beta) ! Equation 28 (note: weno_eps was already added to beta) + ! reconstruct from right side - elseif (teno) then - ! Fu, et al. (2016) - ! Fu''s code: https://dx.doi.org/10.13140/RG.2.2.36250.34247 - tau = abs(beta(2) - beta(0)) - alpha = 1._wp + tau/beta ! Equation 22 (reuse alpha as gamma; pick C=1 & q=6) - alpha = (alpha*alpha*alpha)**2._wp ! Equation 22 cont. (some CPU compilers cannot optimize x**6.0) - omega = alpha/sum(alpha) ! Equation 25 (reuse omega as xi) - delta = merge(0._wp, 1._wp, omega < teno_CT)! Equation 26 - alpha = delta*d_cbL_${XYZ}$ (:, j) ! Equation 27 + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(1) & + + poly_coef_cbR_${XYZ}$ (j, 0, 1)*dvd(0) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(0) & + + poly_coef_cbR_${XYZ}$ (j, 1, 1)*dvd(-1) + poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 2, 0)*dvd(-1) & + + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-2) - end if + if (wenojs) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + elseif (mapped_weno) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) omega = alpha/sum(alpha) + alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) - vL_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) - - ! reconstruct from right side - - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(1) & - + poly_coef_cbR_${XYZ}$ (j, 0, 1)*dvd(0) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(0) & - + poly_coef_cbR_${XYZ}$ (j, 1, 1)*dvd(-1) - poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 2, 0)*dvd(-1) & - + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-2) - - if (wenojs) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) - - elseif (mapped_weno) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) - omega = alpha/sum(alpha) - alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) + elseif (wenoz) then - elseif (wenoz) then + alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + tau/beta) - alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + tau/beta) + elseif (teno) then + alpha = delta*d_cbR_${XYZ}$ (:, j) - elseif (teno) then - alpha = delta*d_cbR_${XYZ}$ (:, j) + end if - end if + omega = alpha/sum(alpha) - omega = alpha/sum(alpha) - - vR_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) + vR_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) - end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() if (mp_weno) then call s_preserve_monotonicity(v_rs_ws_${XYZ}$, vL_rs_vf_${XYZ}$, & @@ -911,192 +911,192 @@ contains elseif (weno_order == 7) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - #:call GPU_PARALLEL_LOOP(collapse=3,private='[poly,beta,alpha,omega,tau,delta,dvd,v]') - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, v_size - - if (teno) v = v_rs_ws_${XYZ}$ (j - 3:j + 3, k, l, i) ! temporary field value array for clarity - - if (.not. teno) then - dvd(2) = v_rs_ws_${XYZ}$ (j + 3, k, l, i) & - - v_rs_ws_${XYZ}$ (j + 2, k, l, i) - dvd(1) = v_rs_ws_${XYZ}$ (j + 2, k, l, i) & - - v_rs_ws_${XYZ}$ (j + 1, k, l, i) - dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & - - v_rs_ws_${XYZ}$ (j, k, l, i) - dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 1, k, l, i) - dvd(-2) = v_rs_ws_${XYZ}$ (j - 1, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 2, k, l, i) - dvd(-3) = v_rs_ws_${XYZ}$ (j - 2, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 3, k, l, i) - - poly(3) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(2) & - + poly_coef_cbL_${XYZ}$ (j, 0, 1)*dvd(1) & - + poly_coef_cbL_${XYZ}$ (j, 0, 2)*dvd(0) - poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(1) & - + poly_coef_cbL_${XYZ}$ (j, 1, 1)*dvd(0) & - + poly_coef_cbL_${XYZ}$ (j, 1, 2)*dvd(-1) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 2, 0)*dvd(0) & - + poly_coef_cbL_${XYZ}$ (j, 2, 1)*dvd(-1) & - + poly_coef_cbL_${XYZ}$ (j, 2, 2)*dvd(-2) - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 3, 0)*dvd(-1) & - + poly_coef_cbL_${XYZ}$ (j, 3, 1)*dvd(-2) & - + poly_coef_cbL_${XYZ}$ (j, 3, 2)*dvd(-3) - - else - ! (Fu, et al., 2016) Table 1 - ! Note: Unlike TENO5, TENO7 stencils differ from WENO7 stencils - ! See Figure 2 (right) for right-sided flux (at i+1/2) - ! Here we need the left-sided flux, so we flip the weights with respect to the x=i point - ! But we need to keep the stencil order to reuse the beta coefficients - poly(0) = ( 2._wp*v(-1) + 5._wp*v( 0) - 1._wp*v( 1)) / 6._wp !& - poly(1) = (11._wp*v( 0) - 7._wp*v( 1) + 2._wp*v( 2)) / 6._wp !& - poly(2) = (-1._wp*v(-2) + 5._wp*v(-1) + 2._wp*v( 0)) / 6._wp !& - poly(3) = (25._wp*v( 0) - 23._wp*v( 1) + 13._wp*v( 2) - 3._wp*v( 3)) / 12._wp !& - poly(4) = ( 1._wp*v(-3) - 5._wp*v(-2) + 13._wp*v(-1) + 3._wp*v( 0)) / 12._wp !& - end if - - if (.not. teno) then - - beta(3) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(2)*dvd(2) & - + beta_coef_${XYZ}$ (j, 0, 1)*dvd(2)*dvd(1) & - + beta_coef_${XYZ}$ (j, 0, 2)*dvd(2)*dvd(0) & - + beta_coef_${XYZ}$ (j, 0, 3)*dvd(1)*dvd(1) & - + beta_coef_${XYZ}$ (j, 0, 4)*dvd(1)*dvd(0) & - + beta_coef_${XYZ}$ (j, 0, 5)*dvd(0)*dvd(0) & - + weno_eps - - beta(2) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(1)*dvd(1) & - + beta_coef_${XYZ}$ (j, 1, 1)*dvd(1)*dvd(0) & - + beta_coef_${XYZ}$ (j, 1, 2)*dvd(1)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 1, 3)*dvd(0)*dvd(0) & - + beta_coef_${XYZ}$ (j, 1, 4)*dvd(0)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 1, 5)*dvd(-1)*dvd(-1) & - + weno_eps - - beta(1) = beta_coef_${XYZ}$ (j, 2, 0)*dvd(0)*dvd(0) & - + beta_coef_${XYZ}$ (j, 2, 1)*dvd(0)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 2, 2)*dvd(0)*dvd(-2) & - + beta_coef_${XYZ}$ (j, 2, 3)*dvd(-1)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 2, 4)*dvd(-1)*dvd(-2) & - + beta_coef_${XYZ}$ (j, 2, 5)*dvd(-2)*dvd(-2) & - + weno_eps - - beta(0) = beta_coef_${XYZ}$ (j, 3, 0)*dvd(-1)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 3, 1)*dvd(-1)*dvd(-2) & - + beta_coef_${XYZ}$ (j, 3, 2)*dvd(-1)*dvd(-3) & - + beta_coef_${XYZ}$ (j, 3, 3)*dvd(-2)*dvd(-2) & - + beta_coef_${XYZ}$ (j, 3, 4)*dvd(-2)*dvd(-3) & - + beta_coef_${XYZ}$ (j, 3, 5)*dvd(-3)*dvd(-3) & - + weno_eps - - else ! TENO - ! High-Order Low-Dissipation Targeted ENO Schemes for Ideal Magnetohydrodynamics (Fu & Tang, 2019) Section 3.2 - beta(0) = 13._wp/12._wp*(v(-1) - 2._wp*v( 0) + v( 1))**2._wp + (( v(-1) - v( 1))**2._wp)/4._wp + weno_eps !& - beta(1) = 13._wp/12._wp*(v( 0) - 2._wp*v( 1) + v( 2))**2._wp + ((3._wp*v( 0) - 4._wp*v( 1) + v( 2))**2._wp)/4._wp + weno_eps !& - beta(2) = 13._wp/12._wp*(v(-2) - 2._wp*v(-1) + v( 0))**2._wp + (( v(-2) - 4._wp*v(-1) + 3._wp*v( 0))**2._wp)/4._wp + weno_eps !& - - beta(3) = ( v( 0)*(2107._wp*v( 0) - 9402._wp*v( 1) + 7042._wp*v( 2) - 1854._wp*v( 3)) & !& - + v( 1)*( 11003._wp*v( 1) - 17246._wp*v( 2) + 4642._wp*v( 3)) & !& - + v( 2)*( 7043._wp*v( 2) - 3882._wp*v( 3)) & !& - + v( 3)*( 547._wp*v( 3)) ) / 240._wp & !& - + weno_eps !& - - beta(4) = ( v(-3)*(547._wp*v(-3) - 3882._wp*v(-2) + 4642._wp*v(-1) - 1854._wp*v( 0)) & !& - + v(-2)*( 7043._wp*v(-2) - 17246._wp*v(-1) + 7042._wp*v( 0)) & !& - + v(-1)*( 11003._wp*v(-1) - 9402._wp*v( 0)) & !& - + v( 0)*( 2107._wp*v( 0)) ) / 240._wp & !& - + weno_eps !& - end if - - if (wenojs) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) - - elseif (mapped_weno) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) - omega = alpha/sum(alpha) - alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) - - elseif (wenoz) then - ! Castro, et al. (2010) - ! Don & Borges (2013) also helps - tau = abs(beta(3) - beta(0)) ! Equation 50 - alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + (tau/beta)**wenoz_q) ! q = 2,3,4 for stability - - elseif (teno) then - tau = abs(beta(4) - beta(3)) ! Note the reordering of stencils - alpha = 1._wp + tau/beta - alpha = (alpha*alpha*alpha)**2._wp ! some CPU compilers cannot optimize x**6.0 - omega = alpha/sum(alpha) - delta = merge(0._wp, 1._wp, omega < teno_CT) - alpha = delta*d_cbL_${XYZ}$ (:, j) - - end if + $:GPU_PARALLEL_LOOP(collapse=3,private='[i,j,k,l,poly,beta,alpha,omega,tau,delta,dvd,v]') + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + $:GPU_LOOP(parallelism='[seq]') + do i = 1, v_size + + if (teno) v = v_rs_ws_${XYZ}$ (j - 3:j + 3, k, l, i) ! temporary field value array for clarity + + if (.not. teno) then + dvd(2) = v_rs_ws_${XYZ}$ (j + 3, k, l, i) & + - v_rs_ws_${XYZ}$ (j + 2, k, l, i) + dvd(1) = v_rs_ws_${XYZ}$ (j + 2, k, l, i) & + - v_rs_ws_${XYZ}$ (j + 1, k, l, i) + dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & + - v_rs_ws_${XYZ}$ (j, k, l, i) + dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 1, k, l, i) + dvd(-2) = v_rs_ws_${XYZ}$ (j - 1, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 2, k, l, i) + dvd(-3) = v_rs_ws_${XYZ}$ (j - 2, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 3, k, l, i) + poly(3) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(2) & + + poly_coef_cbL_${XYZ}$ (j, 0, 1)*dvd(1) & + + poly_coef_cbL_${XYZ}$ (j, 0, 2)*dvd(0) + poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(1) & + + poly_coef_cbL_${XYZ}$ (j, 1, 1)*dvd(0) & + + poly_coef_cbL_${XYZ}$ (j, 1, 2)*dvd(-1) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 2, 0)*dvd(0) & + + poly_coef_cbL_${XYZ}$ (j, 2, 1)*dvd(-1) & + + poly_coef_cbL_${XYZ}$ (j, 2, 2)*dvd(-2) + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 3, 0)*dvd(-1) & + + poly_coef_cbL_${XYZ}$ (j, 3, 1)*dvd(-2) & + + poly_coef_cbL_${XYZ}$ (j, 3, 2)*dvd(-3) + + else + ! (Fu, et al., 2016) Table 1 + ! Note: Unlike TENO5, TENO7 stencils differ from WENO7 stencils + ! See Figure 2 (right) for right-sided flux (at i+1/2) + ! Here we need the left-sided flux, so we flip the weights with respect to the x=i point + ! But we need to keep the stencil order to reuse the beta coefficients + poly(0) = ( 2._wp*v(-1) + 5._wp*v( 0) - 1._wp*v( 1)) / 6._wp !& + poly(1) = (11._wp*v( 0) - 7._wp*v( 1) + 2._wp*v( 2)) / 6._wp !& + poly(2) = (-1._wp*v(-2) + 5._wp*v(-1) + 2._wp*v( 0)) / 6._wp !& + poly(3) = (25._wp*v( 0) - 23._wp*v( 1) + 13._wp*v( 2) - 3._wp*v( 3)) / 12._wp !& + poly(4) = ( 1._wp*v(-3) - 5._wp*v(-2) + 13._wp*v(-1) + 3._wp*v( 0)) / 12._wp !& + end if + + if (.not. teno) then + + beta(3) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(2)*dvd(2) & + + beta_coef_${XYZ}$ (j, 0, 1)*dvd(2)*dvd(1) & + + beta_coef_${XYZ}$ (j, 0, 2)*dvd(2)*dvd(0) & + + beta_coef_${XYZ}$ (j, 0, 3)*dvd(1)*dvd(1) & + + beta_coef_${XYZ}$ (j, 0, 4)*dvd(1)*dvd(0) & + + beta_coef_${XYZ}$ (j, 0, 5)*dvd(0)*dvd(0) & + + weno_eps + + beta(2) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(1)*dvd(1) & + + beta_coef_${XYZ}$ (j, 1, 1)*dvd(1)*dvd(0) & + + beta_coef_${XYZ}$ (j, 1, 2)*dvd(1)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 1, 3)*dvd(0)*dvd(0) & + + beta_coef_${XYZ}$ (j, 1, 4)*dvd(0)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 1, 5)*dvd(-1)*dvd(-1) & + + weno_eps + + beta(1) = beta_coef_${XYZ}$ (j, 2, 0)*dvd(0)*dvd(0) & + + beta_coef_${XYZ}$ (j, 2, 1)*dvd(0)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 2, 2)*dvd(0)*dvd(-2) & + + beta_coef_${XYZ}$ (j, 2, 3)*dvd(-1)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 2, 4)*dvd(-1)*dvd(-2) & + + beta_coef_${XYZ}$ (j, 2, 5)*dvd(-2)*dvd(-2) & + + weno_eps + + beta(0) = beta_coef_${XYZ}$ (j, 3, 0)*dvd(-1)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 3, 1)*dvd(-1)*dvd(-2) & + + beta_coef_${XYZ}$ (j, 3, 2)*dvd(-1)*dvd(-3) & + + beta_coef_${XYZ}$ (j, 3, 3)*dvd(-2)*dvd(-2) & + + beta_coef_${XYZ}$ (j, 3, 4)*dvd(-2)*dvd(-3) & + + beta_coef_${XYZ}$ (j, 3, 5)*dvd(-3)*dvd(-3) & + + weno_eps + + else ! TENO + ! High-Order Low-Dissipation Targeted ENO Schemes for Ideal Magnetohydrodynamics (Fu & Tang, 2019) Section 3.2 + beta(0) = 13._wp/12._wp*(v(-1) - 2._wp*v( 0) + v( 1))**2._wp + (( v(-1) - v( 1))**2._wp)/4._wp + weno_eps !& + beta(1) = 13._wp/12._wp*(v( 0) - 2._wp*v( 1) + v( 2))**2._wp + ((3._wp*v( 0) - 4._wp*v( 1) + v( 2))**2._wp)/4._wp + weno_eps !& + beta(2) = 13._wp/12._wp*(v(-2) - 2._wp*v(-1) + v( 0))**2._wp + (( v(-2) - 4._wp*v(-1) + 3._wp*v( 0))**2._wp)/4._wp + weno_eps !& + + beta(3) = ( v( 0)*(2107._wp*v( 0) - 9402._wp*v( 1) + 7042._wp*v( 2) - 1854._wp*v( 3)) & !& + + v( 1)*( 11003._wp*v( 1) - 17246._wp*v( 2) + 4642._wp*v( 3)) & !& + + v( 2)*( 7043._wp*v( 2) - 3882._wp*v( 3)) & !& + + v( 3)*( 547._wp*v( 3)) ) / 240._wp & !& + + weno_eps !& + + beta(4) = ( v(-3)*(547._wp*v(-3) - 3882._wp*v(-2) + 4642._wp*v(-1) - 1854._wp*v( 0)) & !& + + v(-2)*( 7043._wp*v(-2) - 17246._wp*v(-1) + 7042._wp*v( 0)) & !& + + v(-1)*( 11003._wp*v(-1) - 9402._wp*v( 0)) & !& + + v( 0)*( 2107._wp*v( 0)) ) / 240._wp & !& + + weno_eps !& + end if + + if (wenojs) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + + elseif (mapped_weno) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) omega = alpha/sum(alpha) + alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) + + elseif (wenoz) then + ! Castro, et al. (2010) + ! Don & Borges (2013) also helps + tau = abs(beta(3) - beta(0)) ! Equation 50 + alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + (tau/beta)**wenoz_q) ! q = 2,3,4 for stability + + elseif (teno) then + tau = abs(beta(4) - beta(3)) ! Note the reordering of stencils + alpha = 1._wp + tau/beta + alpha = (alpha*alpha*alpha)**2._wp ! some CPU compilers cannot optimize x**6.0 + omega = alpha/sum(alpha) + delta = merge(0._wp, 1._wp, omega < teno_CT) + alpha = delta*d_cbL_${XYZ}$ (:, j) + + end if + + omega = alpha/sum(alpha) - vL_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) - - if (.not. teno) then - poly(3) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(2) & - + poly_coef_cbR_${XYZ}$ (j, 0, 1)*dvd(1) & - + poly_coef_cbR_${XYZ}$ (j, 0, 2)*dvd(0) - poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(1) & - + poly_coef_cbR_${XYZ}$ (j, 1, 1)*dvd(0) & - + poly_coef_cbR_${XYZ}$ (j, 1, 2)*dvd(-1) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 2, 0)*dvd(0) & - + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-1) & - + poly_coef_cbR_${XYZ}$ (j, 2, 2)*dvd(-2) - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 3, 0)*dvd(-1) & - + poly_coef_cbR_${XYZ}$ (j, 3, 1)*dvd(-2) & - + poly_coef_cbR_${XYZ}$ (j, 3, 2)*dvd(-3) - else - poly(0) = (-1._wp*v(-1) + 5._wp*v( 0) + 2._wp*v( 1)) / 6._wp !& - poly(1) = ( 2._wp*v( 0) + 5._wp*v( 1) - 1._wp*v( 2)) / 6._wp !& - poly(2) = ( 2._wp*v(-2) - 7._wp*v(-1) + 11._wp*v( 0)) / 6._wp !& - poly(3) = ( 3._wp*v( 0) + 13._wp*v( 1) - 5._wp*v( 2) + 1._wp*v( 3)) / 12._wp !& - poly(4) = (-3._wp*v(-3) + 13._wp*v(-2) - 23._wp*v(-1) + 25._wp*v( 0)) / 12._wp !& - end if - - if (wenojs) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) - - elseif (mapped_weno) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) - omega = alpha/sum(alpha) - alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) - - elseif (wenoz) then - alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + (tau/beta)**wenoz_q) - - elseif (teno) then - alpha = delta*d_cbR_${XYZ}$ (:, j) - - end if + vL_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) + if (.not. teno) then + poly(3) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(2) & + + poly_coef_cbR_${XYZ}$ (j, 0, 1)*dvd(1) & + + poly_coef_cbR_${XYZ}$ (j, 0, 2)*dvd(0) + poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(1) & + + poly_coef_cbR_${XYZ}$ (j, 1, 1)*dvd(0) & + + poly_coef_cbR_${XYZ}$ (j, 1, 2)*dvd(-1) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 2, 0)*dvd(0) & + + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-1) & + + poly_coef_cbR_${XYZ}$ (j, 2, 2)*dvd(-2) + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 3, 0)*dvd(-1) & + + poly_coef_cbR_${XYZ}$ (j, 3, 1)*dvd(-2) & + + poly_coef_cbR_${XYZ}$ (j, 3, 2)*dvd(-3) + else + poly(0) = (-1._wp*v(-1) + 5._wp*v( 0) + 2._wp*v( 1)) / 6._wp !& + poly(1) = ( 2._wp*v( 0) + 5._wp*v( 1) - 1._wp*v( 2)) / 6._wp !& + poly(2) = ( 2._wp*v(-2) - 7._wp*v(-1) + 11._wp*v( 0)) / 6._wp !& + poly(3) = ( 3._wp*v( 0) + 13._wp*v( 1) - 5._wp*v( 2) + 1._wp*v( 3)) / 12._wp !& + poly(4) = (-3._wp*v(-3) + 13._wp*v(-2) - 23._wp*v(-1) + 25._wp*v( 0)) / 12._wp !& + end if + + if (wenojs) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + + elseif (mapped_weno) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) omega = alpha/sum(alpha) + alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) + + elseif (wenoz) then + alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + (tau/beta)**wenoz_q) + + elseif (teno) then + alpha = delta*d_cbR_${XYZ}$ (:, j) - vR_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) + end if + + omega = alpha/sum(alpha) + + vR_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) - end do end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if #:endfor @@ -1139,51 +1139,51 @@ contains $:GPU_UPDATE(device='[v_size]') if (weno_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do j = 1, v_size - do q = is3_weno%beg, is3_weno%end - do l = is2_weno%beg, is2_weno%end - do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn - v_rs_ws_x(k, l, q, j) = v_vf(j)%sf(k, l, q) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) + do j = 1, v_size + do q = is3_weno%beg, is3_weno%end + do l = is2_weno%beg, is2_weno%end + do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn + v_rs_ws_x(k, l, q, j) = v_vf(j)%sf(k, l, q) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if ! Reshaping/Projecting onto Characteristic Fields in y-direction if (n == 0) return if (weno_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do j = 1, v_size - do q = is3_weno%beg, is3_weno%end - do l = is2_weno%beg, is2_weno%end - do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn - v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) + do j = 1, v_size + do q = is3_weno%beg, is3_weno%end + do l = is2_weno%beg, is2_weno%end + do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn + v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if ! Reshaping/Projecting onto Characteristic Fields in z-direction if (p == 0) return if (weno_dir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do j = 1, v_size - do q = is3_weno%beg, is3_weno%end - do l = is2_weno%beg, is2_weno%end - do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn - v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) + do j = 1, v_size + do q = is3_weno%beg, is3_weno%end + do l = is2_weno%beg, is2_weno%end + do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn + v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k) end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_initialize_weno @@ -1233,132 +1233,132 @@ contains real(wp), parameter :: alpha_mp = 2._wp real(wp), parameter :: beta_mp = 4._wp/3._wp - #:call GPU_PARALLEL_LOOP(collapse=4,private='[d]') - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - do i = 1, v_size - d(-1) = v_rs_ws(j, k, l, i) & - + v_rs_ws(j - 2, k, l, i) & - - v_rs_ws(j - 1, k, l, i) & - *2._wp - d(0) = v_rs_ws(j + 1, k, l, i) & - + v_rs_ws(j - 1, k, l, i) & - - v_rs_ws(j, k, l, i) & - *2._wp - d(1) = v_rs_ws(j + 2, k, l, i) & - + v_rs_ws(j, k, l, i) & - - v_rs_ws(j + 1, k, l, i) & - *2._wp - - d_MD = (sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, 4._wp*d(0) - d(-1))) & - *abs((sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(-1))) & - *(sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(0)))) & - *min(abs(4._wp*d(-1) - d(0)), abs(d(-1)), & - abs(4._wp*d(0) - d(-1)), abs(d(0)))/8._wp - - d_LC = (sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, 4._wp*d(1) - d(0))) & - *abs((sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(0))) & - *(sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(1)))) & - *min(abs(4._wp*d(0) - d(1)), abs(d(0)), & - abs(4._wp*d(1) - d(0)), abs(d(1)))/8._wp - - vL_UL = v_rs_ws(j, k, l, i) & - - (v_rs_ws(j + 1, k, l, i) & - - v_rs_ws(j, k, l, i))*alpha_mp - - vL_MD = (v_rs_ws(j, k, l, i) & - + v_rs_ws(j - 1, k, l, i) & - - d_MD)*5.e-1_wp - - vL_LC = v_rs_ws(j, k, l, i) & - - (v_rs_ws(j + 1, k, l, i) & - - v_rs_ws(j, k, l, i))*5.e-1_wp + beta_mp*d_LC - - vL_min = max(min(v_rs_ws(j, k, l, i), & - v_rs_ws(j - 1, k, l, i), & - vL_MD), & - min(v_rs_ws(j, k, l, i), & - vL_UL, & - vL_LC)) - - vL_max = min(max(v_rs_ws(j, k, l, i), & - v_rs_ws(j - 1, k, l, i), & - vL_MD), & - max(v_rs_ws(j, k, l, i), & - vL_UL, & - vL_LC)) - - vL_rs_vf(j, k, l, i) = vL_rs_vf(j, k, l, i) & - + (sign(5.e-1_wp, vL_min - vL_rs_vf(j, k, l, i)) & - + sign(5.e-1_wp, vL_max - vL_rs_vf(j, k, l, i))) & - *min(abs(vL_min - vL_rs_vf(j, k, l, i)), & - abs(vL_max - vL_rs_vf(j, k, l, i))) - ! END: Left Monotonicity Preserving Bound - - ! Right Monotonicity Preserving Bound - d(-1) = v_rs_ws(j, k, l, i) & - + v_rs_ws(j - 2, k, l, i) & - - v_rs_ws(j - 1, k, l, i) & - *2._wp - d(0) = v_rs_ws(j + 1, k, l, i) & - + v_rs_ws(j - 1, k, l, i) & - - v_rs_ws(j, k, l, i) & - *2._wp - d(1) = v_rs_ws(j + 2, k, l, i) & - + v_rs_ws(j, k, l, i) & - - v_rs_ws(j + 1, k, l, i) & - *2._wp - - d_MD = (sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, 4._wp*d(1) - d(0))) & - *abs((sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(0))) & - *(sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(1)))) & - *min(abs(4._wp*d(0) - d(1)), abs(d(0)), & - abs(4._wp*d(1) - d(0)), abs(d(1)))/8._wp - - d_LC = (sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, 4._wp*d(0) - d(-1))) & - *abs((sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(-1))) & - *(sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(0)))) & - *min(abs(4._wp*d(-1) - d(0)), abs(d(-1)), & - abs(4._wp*d(0) - d(-1)), abs(d(0)))/8._wp - - vR_UL = v_rs_ws(j, k, l, i) & - + (v_rs_ws(j, k, l, i) & - - v_rs_ws(j - 1, k, l, i))*alpha_mp - - vR_MD = (v_rs_ws(j, k, l, i) & - + v_rs_ws(j + 1, k, l, i) & - - d_MD)*5.e-1_wp - - vR_LC = v_rs_ws(j, k, l, i) & - + (v_rs_ws(j, k, l, i) & - - v_rs_ws(j - 1, k, l, i))*5.e-1_wp + beta_mp*d_LC - - vR_min = max(min(v_rs_ws(j, k, l, i), & - v_rs_ws(j + 1, k, l, i), & - vR_MD), & - min(v_rs_ws(j, k, l, i), & - vR_UL, & - vR_LC)) - - vR_max = min(max(v_rs_ws(j, k, l, i), & - v_rs_ws(j + 1, k, l, i), & - vR_MD), & - max(v_rs_ws(j, k, l, i), & - vR_UL, & - vR_LC)) - - vR_rs_vf(j, k, l, i) = vR_rs_vf(j, k, l, i) & - + (sign(5.e-1_wp, vR_min - vR_rs_vf(j, k, l, i)) & - + sign(5.e-1_wp, vR_max - vR_rs_vf(j, k, l, i))) & - *min(abs(vR_min - vR_rs_vf(j, k, l, i)), & - abs(vR_max - vR_rs_vf(j, k, l, i))) - ! END: Right Monotonicity Preserving Bound - end do + $:GPU_PARALLEL_LOOP(private='[i,j,k,l,d]', collapse=4) + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + do i = 1, v_size + d(-1) = v_rs_ws(j, k, l, i) & + + v_rs_ws(j - 2, k, l, i) & + - v_rs_ws(j - 1, k, l, i) & + *2._wp + d(0) = v_rs_ws(j + 1, k, l, i) & + + v_rs_ws(j - 1, k, l, i) & + - v_rs_ws(j, k, l, i) & + *2._wp + d(1) = v_rs_ws(j + 2, k, l, i) & + + v_rs_ws(j, k, l, i) & + - v_rs_ws(j + 1, k, l, i) & + *2._wp + + d_MD = (sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, 4._wp*d(0) - d(-1))) & + *abs((sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(-1))) & + *(sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(0)))) & + *min(abs(4._wp*d(-1) - d(0)), abs(d(-1)), & + abs(4._wp*d(0) - d(-1)), abs(d(0)))/8._wp + + d_LC = (sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, 4._wp*d(1) - d(0))) & + *abs((sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(0))) & + *(sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(1)))) & + *min(abs(4._wp*d(0) - d(1)), abs(d(0)), & + abs(4._wp*d(1) - d(0)), abs(d(1)))/8._wp + + vL_UL = v_rs_ws(j, k, l, i) & + - (v_rs_ws(j + 1, k, l, i) & + - v_rs_ws(j, k, l, i))*alpha_mp + + vL_MD = (v_rs_ws(j, k, l, i) & + + v_rs_ws(j - 1, k, l, i) & + - d_MD)*5.e-1_wp + + vL_LC = v_rs_ws(j, k, l, i) & + - (v_rs_ws(j + 1, k, l, i) & + - v_rs_ws(j, k, l, i))*5.e-1_wp + beta_mp*d_LC + + vL_min = max(min(v_rs_ws(j, k, l, i), & + v_rs_ws(j - 1, k, l, i), & + vL_MD), & + min(v_rs_ws(j, k, l, i), & + vL_UL, & + vL_LC)) + + vL_max = min(max(v_rs_ws(j, k, l, i), & + v_rs_ws(j - 1, k, l, i), & + vL_MD), & + max(v_rs_ws(j, k, l, i), & + vL_UL, & + vL_LC)) + + vL_rs_vf(j, k, l, i) = vL_rs_vf(j, k, l, i) & + + (sign(5.e-1_wp, vL_min - vL_rs_vf(j, k, l, i)) & + + sign(5.e-1_wp, vL_max - vL_rs_vf(j, k, l, i))) & + *min(abs(vL_min - vL_rs_vf(j, k, l, i)), & + abs(vL_max - vL_rs_vf(j, k, l, i))) + ! END: Left Monotonicity Preserving Bound + + ! Right Monotonicity Preserving Bound + d(-1) = v_rs_ws(j, k, l, i) & + + v_rs_ws(j - 2, k, l, i) & + - v_rs_ws(j - 1, k, l, i) & + *2._wp + d(0) = v_rs_ws(j + 1, k, l, i) & + + v_rs_ws(j - 1, k, l, i) & + - v_rs_ws(j, k, l, i) & + *2._wp + d(1) = v_rs_ws(j + 2, k, l, i) & + + v_rs_ws(j, k, l, i) & + - v_rs_ws(j + 1, k, l, i) & + *2._wp + + d_MD = (sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, 4._wp*d(1) - d(0))) & + *abs((sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(0))) & + *(sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(1)))) & + *min(abs(4._wp*d(0) - d(1)), abs(d(0)), & + abs(4._wp*d(1) - d(0)), abs(d(1)))/8._wp + + d_LC = (sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, 4._wp*d(0) - d(-1))) & + *abs((sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(-1))) & + *(sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(0)))) & + *min(abs(4._wp*d(-1) - d(0)), abs(d(-1)), & + abs(4._wp*d(0) - d(-1)), abs(d(0)))/8._wp + + vR_UL = v_rs_ws(j, k, l, i) & + + (v_rs_ws(j, k, l, i) & + - v_rs_ws(j - 1, k, l, i))*alpha_mp + + vR_MD = (v_rs_ws(j, k, l, i) & + + v_rs_ws(j + 1, k, l, i) & + - d_MD)*5.e-1_wp + + vR_LC = v_rs_ws(j, k, l, i) & + + (v_rs_ws(j, k, l, i) & + - v_rs_ws(j - 1, k, l, i))*5.e-1_wp + beta_mp*d_LC + + vR_min = max(min(v_rs_ws(j, k, l, i), & + v_rs_ws(j + 1, k, l, i), & + vR_MD), & + min(v_rs_ws(j, k, l, i), & + vR_UL, & + vR_LC)) + + vR_max = min(max(v_rs_ws(j, k, l, i), & + v_rs_ws(j + 1, k, l, i), & + vR_MD), & + max(v_rs_ws(j, k, l, i), & + vR_UL, & + vR_LC)) + + vR_rs_vf(j, k, l, i) = vR_rs_vf(j, k, l, i) & + + (sign(5.e-1_wp, vR_min - vR_rs_vf(j, k, l, i)) & + + sign(5.e-1_wp, vR_max - vR_rs_vf(j, k, l, i))) & + *min(abs(vR_min - vR_rs_vf(j, k, l, i)), & + abs(vR_max - vR_rs_vf(j, k, l, i))) + ! END: Right Monotonicity Preserving Bound end do end do end do - #:endcall GPU_PARALLEL_LOOP + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_preserve_monotonicity