diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index e146c15af4..4087a0045e 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -17,12 +17,14 @@ module m_boundary_common use m_constants + use m_delay_file_access + + use m_compile_specific + implicit none type(scalar_field), dimension(:, :), allocatable :: bc_buffers - !$acc declare create(bc_buffers) - - real(wp) :: bcxb, bcxe, bcyb, bcye, bczb, bcze +!$acc declare create(bc_buffers) #ifdef MFC_MPI integer, dimension(1:3, -1:1) :: MPI_BC_TYPE_TYPE, MPI_BC_BUFFER_TYPE @@ -32,9 +34,15 @@ module m_boundary_common s_populate_variables_buffers, & s_create_mpi_types, & s_populate_capillary_buffers, & + s_write_serial_boundary_condition_files, & + s_write_parallel_boundary_condition_files, & + s_read_serial_boundary_condition_files, & + s_read_parallel_boundary_condition_files, & + s_assign_default_bc_type, & + s_populate_grid_variables_buffers, & s_finalize_boundary_common_module - public :: bc_buffers, bcxb, bcxe, bcyb, bcye, bczb, bcze + public :: bc_buffers #ifdef MFC_MPI public :: MPI_BC_TYPE_TYPE, MPI_BC_BUFFER_TYPE @@ -44,11 +52,8 @@ contains impure subroutine s_initialize_boundary_common_module() - bcxb = bc_x%beg; bcxe = bc_x%end; bcyb = bc_y%beg; bcye = bc_y%end; bczb = bc_z%beg; bcze = bc_z%end - @:ALLOCATE(bc_buffers(1:num_dims, -1:1)) -#ifndef MFC_POST_PROCESS if (bc_io) then @:ALLOCATE(bc_buffers(1, -1)%sf(1:sys_size, 0:n, 0:p)) @:ALLOCATE(bc_buffers(1, 1)%sf(1:sys_size, 0:n, 0:p)) @@ -64,66 +69,75 @@ contains end if end if end if -#endif end subroutine s_initialize_boundary_common_module !> The purpose of this procedure is to populate the buffers !! of the primitive variables, depending on the selected !! boundary conditions. - impure subroutine s_populate_variables_buffers(q_prim_vf, pb, mv, bc_type) + impure subroutine s_populate_variables_buffers(bc_type, q_prim_vf, pb, mv) type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv + real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv type(integer_field), dimension(1:num_dims, -1:1), intent(in) :: bc_type integer :: k, l ! Population of Buffers in x-direction - if (bcxb >= 0) then - call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 1, -1) + if (bc_x%beg >= 0) then + call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, -1, sys_size, pb, mv) else !$acc parallel loop collapse(2) gang vector default(present) do l = 0, p do k = 0, n select case (int(bc_type(1, -1)%sf(0, k, l))) case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 1, -1, k, l) + call s_ghost_cell_extrapolation(q_prim_vf, 1, -1, k, l) case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, pb, mv, 1, -1, k, l) + call s_symmetry(q_prim_vf, 1, -1, k, l, pb, mv) case (BC_PERIODIC) - call s_periodic(q_prim_vf, pb, mv, 1, -1, k, l) + call s_periodic(q_prim_vf, 1, -1, k, l, pb, mv) case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, pb, mv, 1, -1, k, l) + call s_slip_wall(q_prim_vf, 1, -1, k, l) case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, pb, mv, 1, -1, k, l) + call s_no_slip_wall(q_prim_vf, 1, -1, k, l) case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, pb, mv, 1, -1, k, l) + call s_dirichlet(q_prim_vf, 1, -1, k, l) end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(1, -1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(1, -1, k, l, pb, mv) + end if end do end do end if - if (bcxe >= 0) then - call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 1, 1) + if (bc_x%end >= 0) then + call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, 1, sys_size, pb, mv) else !$acc parallel loop collapse(2) gang vector default(present) do l = 0, p do k = 0, n select case (int(bc_type(1, 1)%sf(0, k, l))) case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) ! Ghost-cell extrap. BC at end - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 1, 1, k, l) + call s_ghost_cell_extrapolation(q_prim_vf, 1, 1, k, l) case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, pb, mv, 1, 1, k, l) + call s_symmetry(q_prim_vf, 1, 1, k, l, pb, mv) case (BC_PERIODIC) - call s_periodic(q_prim_vf, pb, mv, 1, 1, k, l) + call s_periodic(q_prim_vf, 1, 1, k, l, pb, mv) case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, pb, mv, 1, 1, k, l) + call s_slip_wall(q_prim_vf, 1, 1, k, l) case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, pb, mv, 1, 1, k, l) + call s_no_slip_wall(q_prim_vf, 1, 1, k, l) case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, pb, mv, 1, 1, k, l) + call s_dirichlet(q_prim_vf, 1, 1, k, l) end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(1, 1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(1, 1, k, l, pb, mv) + end if end do end do end if @@ -132,52 +146,63 @@ contains if (n == 0) return - if (bcyb >= 0) then - call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 2, -1) + if (bc_y%beg >= 0) then + call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, -1, sys_size, pb, mv) else !$acc parallel loop collapse(2) gang vector default(present) do l = 0, p do k = -buff_size, m + buff_size select case (int(bc_type(2, -1)%sf(k, 0, l))) case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 2, -1, k, l) + call s_ghost_cell_extrapolation(q_prim_vf, 2, -1, k, l) case (BC_AXIS) call s_axis(q_prim_vf, pb, mv, k, l) case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, pb, mv, 2, -1, k, l) + call s_symmetry(q_prim_vf, 2, -1, k, l, pb, mv) case (BC_PERIODIC) - call s_periodic(q_prim_vf, pb, mv, 2, -1, k, l) + call s_periodic(q_prim_vf, 2, -1, k, l, pb, mv) case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, pb, mv, 2, -1, k, l) + call s_slip_wall(q_prim_vf, 2, -1, k, l) case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, pb, mv, 2, -1, k, l) + call s_no_slip_wall(q_prim_vf, 2, -1, k, l) case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, pb, mv, 2, -1, k, l) + call s_dirichlet(q_prim_vf, 2, -1, k, l) end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(2, -1)%sf(k, 0, l) <= BC_GHOST_EXTRAP) .and. & + (bc_type(2, -1)%sf(k, 0, l) /= BC_AXIS)) then + call s_qbmm_extrapolation(2, -1, k, l, pb, mv) + end if end do end do end if - if (bcye >= 0) then - call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 2, 1) + if (bc_y%end >= 0) then + call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, 1, sys_size, pb, mv) else !$acc parallel loop collapse(2) gang vector default(present) do l = 0, p do k = -buff_size, m + buff_size select case (int(bc_type(2, 1)%sf(k, 0, l))) case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 2, 1, k, l) + call s_ghost_cell_extrapolation(q_prim_vf, 2, 1, k, l) case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, pb, mv, 2, 1, k, l) + call s_symmetry(q_prim_vf, 2, 1, k, l, pb, mv) case (BC_PERIODIC) - call s_periodic(q_prim_vf, pb, mv, 2, 1, k, l) + call s_periodic(q_prim_vf, 2, 1, k, l, pb, mv) case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, pb, mv, 2, 1, k, l) + call s_slip_wall(q_prim_vf, 2, 1, k, l) case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, pb, mv, 2, 1, k, l) + call s_no_slip_wall(q_prim_vf, 2, 1, k, l) case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, pb, mv, 2, 1, k, l) + call s_dirichlet(q_prim_vf, 2, 1, k, l) end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(2, 1)%sf(k, 0, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(2, 1, k, l, pb, mv) + end if end do end do end if @@ -186,50 +211,60 @@ contains if (p == 0) return - if (bczb >= 0) then - call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 3, -1) + if (bc_z%beg >= 0) then + call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, -1, sys_size, pb, mv) else !$acc parallel loop collapse(2) gang vector default(present) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (int(bc_type(3, -1)%sf(k, l, 0))) case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 3, -1, k, l) + call s_ghost_cell_extrapolation(q_prim_vf, 3, -1, k, l) case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, pb, mv, 3, -1, k, l) + call s_symmetry(q_prim_vf, 3, -1, k, l, pb, mv) case (BC_PERIODIC) - call s_periodic(q_prim_vf, pb, mv, 3, -1, k, l) + call s_periodic(q_prim_vf, 3, -1, k, l, pb, mv) case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, pb, mv, 3, -1, k, l) + call s_slip_wall(q_prim_vf, 3, -1, k, l) case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, pb, mv, 3, -1, k, l) + call s_no_slip_wall(q_prim_vf, 3, -1, k, l) case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, pb, mv, 3, -1, k, l) + call s_dirichlet(q_prim_vf, 3, -1, k, l) end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(3, -1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(3, -1, k, l, pb, mv) + end if end do end do end if - if (bcze >= 0) then - call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 3, 1) + if (bc_z%end >= 0) then + call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, 1, sys_size, pb, mv) else !$acc parallel loop collapse(2) gang vector default(present) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (int(bc_type(3, 1)%sf(k, l, 0))) case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 3, 1, k, l) + call s_ghost_cell_extrapolation(q_prim_vf, 3, 1, k, l) case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, pb, mv, 3, 1, k, l) + call s_symmetry(q_prim_vf, 3, 1, k, l, pb, mv) case (BC_PERIODIC) - call s_periodic(q_prim_vf, pb, mv, 3, 1, k, l) + call s_periodic(q_prim_vf, 3, 1, k, l, pb, mv) case (BC_SlIP_WALL) - call s_slip_wall(q_prim_vf, pb, mv, 3, 1, k, l) + call s_slip_wall(q_prim_vf, 3, 1, k, l) case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, pb, mv, 3, 1, k, l) + call s_no_slip_wall(q_prim_vf, 3, 1, k, l) case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, pb, mv, 3, 1, k, l) + call s_dirichlet(q_prim_vf, 3, 1, k, l) end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(3, 1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(3, 1, k, l, pb, mv) + end if end do end do end if @@ -237,23 +272,18 @@ contains end subroutine s_populate_variables_buffers - pure subroutine s_ghost_cell_extrapolation(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l) + pure subroutine s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_ghost_cell_extrapolation #else !$acc routine seq #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l integer :: j, i - if (qbmm .and. .not. polytropic) then - call s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l) - end if - if (bc_dir == 1) then !< x-direction if (bc_loc == -1) then !bc_x%beg do i = 1, sys_size @@ -306,14 +336,10 @@ contains end subroutine s_ghost_cell_extrapolation - pure subroutine s_symmetry(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_symmetry -#else + pure subroutine s_symmetry(q_prim_vf, bc_dir, bc_loc, k, l, pb, mv) !$acc routine seq -#endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv + real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l @@ -570,14 +596,10 @@ contains end subroutine s_symmetry - pure subroutine s_periodic(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_periodic -#else + pure subroutine s_periodic(q_prim_vf, bc_dir, bc_loc, k, l, pb, mv) !$acc routine seq -#endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv + real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l @@ -714,11 +736,7 @@ contains end subroutine s_periodic pure subroutine s_axis(q_prim_vf, pb, mv, k, l) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_axis -#else !$acc routine seq -#endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: k, l @@ -776,23 +794,18 @@ contains end subroutine s_axis - pure subroutine s_slip_wall(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l) + pure subroutine s_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_slip_wall #else !$acc routine seq #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l integer :: j, i - if (qbmm .and. .not. polytropic) then - call s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l) - end if - if (bc_dir == 1) then !< x-direction if (bc_loc == -1) then !< bc_x%beg do i = 1, sys_size @@ -875,23 +888,18 @@ contains end subroutine s_slip_wall - pure subroutine s_no_slip_wall(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l) + pure subroutine s_no_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_no_slip_wall #else !$acc routine seq #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l integer :: j, i - if (qbmm .and. .not. polytropic) then - call s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l) - end if - if (bc_dir == 1) then !< x-direction if (bc_loc == -1) then !< bc_x%beg do i = 1, sys_size @@ -1010,22 +1018,19 @@ contains end subroutine s_no_slip_wall - pure subroutine s_dirichlet(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l) + pure subroutine s_dirichlet(q_prim_vf, bc_dir, bc_loc, k, l) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_dirichlet #else !$acc routine seq #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l integer :: j, i -#ifdef MFC_PRE_PROCESS - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 1, -1, k, l) -#else +#ifdef MFC_SIMULATION if (bc_dir == 1) then !< x-direction if (bc_loc == -1) then !bc_x%beg do i = 1, sys_size @@ -1075,17 +1080,15 @@ contains end do end if end if +#else + call s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l) #endif end subroutine s_dirichlet - pure subroutine s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_qbmm_extrapolation -#else + pure subroutine s_qbmm_extrapolation(bc_dir, bc_loc, k, l, pb, mv) !$acc routine seq -#endif - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv + real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l @@ -1163,8 +1166,8 @@ contains integer :: k, l !< x-direction - if (bcxb >= 0) then - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, -1) + if (bc_x%beg >= 0) then + call s_mpi_sendrecv_variables_buffers(c_divs, 1, -1, num_dims + 1) else !$acc parallel loop collapse(2) gang vector default(present) do l = 0, p @@ -1181,8 +1184,8 @@ contains end do end if - if (bcxe >= 0) then - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, 1) + if (bc_x%end >= 0) then + call s_mpi_sendrecv_variables_buffers(c_divs, 1, 1, num_dims + 1) else !$acc parallel loop collapse(2) gang vector default(present) do l = 0, p @@ -1202,8 +1205,8 @@ contains if (n == 0) return !< y-direction - if (bcyb >= 0) then - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, -1) + if (bc_y%beg >= 0) then + call s_mpi_sendrecv_variables_buffers(c_divs, 2, -1, num_dims + 1) else !$acc parallel loop collapse(2) gang vector default(present) do l = 0, p @@ -1220,8 +1223,8 @@ contains end do end if - if (bcye >= 0) then - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, 1) + if (bc_y%end >= 0) then + call s_mpi_sendrecv_variables_buffers(c_divs, 2, 1, num_dims + 1) else !$acc parallel loop collapse(2) gang vector default(present) do l = 0, p @@ -1241,8 +1244,8 @@ contains if (p == 0) return !< z-direction - if (bczb >= 0) then - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, -1) + if (bc_z%beg >= 0) then + call s_mpi_sendrecv_variables_buffers(c_divs, 3, -1, num_dims + 1) else !$acc parallel loop collapse(2) gang vector default(present) do l = -buff_size, n + buff_size @@ -1259,8 +1262,8 @@ contains end do end if - if (bcze >= 0) then - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, 1) + if (bc_z%end >= 0) then + call s_mpi_sendrecv_variables_buffers(c_divs, 3, 1, num_dims + 1) else !$acc parallel loop collapse(2) gang vector default(present) do l = -buff_size, n + buff_size @@ -1509,9 +1512,469 @@ contains #endif end subroutine s_create_mpi_types - impure subroutine s_finalize_boundary_common_module() + subroutine s_write_serial_boundary_condition_files(q_prim_vf, bc_type, step_dirpath, old_grid) + + type(scalar_field), dimension(sys_size) :: q_prim_vf + type(integer_field), dimension(1:num_dims, -1:1) :: bc_type + logical :: old_grid + + character(LEN=*), intent(in) :: step_dirpath + + integer :: dir, loc, i + character(len=path_len) :: file_path + + character(len=10) :: status + + if (old_grid) then + status = 'old' + else + status = 'new' + end if + + call s_pack_boundary_condition_buffers(q_prim_vf) + + file_path = trim(step_dirpath)//'/bc_type.dat' + open (1, FILE=trim(file_path), FORM='unformatted', STATUS=status) + do dir = 1, num_dims + do loc = -1, 1, 2 + write (1) bc_type(dir, loc)%sf + end do + end do + close (1) + + file_path = trim(step_dirpath)//'/bc_buffers.dat' + open (1, FILE=trim(file_path), FORM='unformatted', STATUS=status) + do dir = 1, num_dims + do loc = -1, 1, 2 + write (1) bc_buffers(dir, loc)%sf + end do + end do + close (1) + + end subroutine s_write_serial_boundary_condition_files + + subroutine s_write_parallel_boundary_condition_files(q_prim_vf, bc_type) + + type(scalar_field), dimension(sys_size) :: q_prim_vf + type(integer_field), dimension(1:num_dims, -1:1) :: bc_type + + integer :: dir, loc + character(len=path_len) :: file_loc, file_path + + character(len=10) :: status + +#ifdef MFC_MPI + integer :: ierr + integer :: file_id + integer :: offset + character(len=7) :: proc_rank_str + logical :: dir_check + + call s_pack_boundary_condition_buffers(q_prim_vf) + + file_loc = trim(case_dir)//'/restart_data/boundary_conditions' + if (proc_rank == 0) then + call my_inquire(file_loc, dir_check) + if (dir_check .neqv. .true.) then + call s_create_directory(trim(file_loc)) + end if + end if + + call s_create_mpi_types(bc_type) + + call s_mpi_barrier() + + call DelayFileAccess(proc_rank) + + write (proc_rank_str, '(I7.7)') proc_rank + file_path = trim(file_loc)//'/bc_'//trim(proc_rank_str)//'.dat' + call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_CREATE + MPI_MODE_WRONLY, MPI_INFO_NULL, file_id, ierr) + + offset = 0 + + ! Write bc_types + do dir = 1, num_dims + do loc = -1, 1, 2 + call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) + call MPI_File_write_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) + offset = offset + sizeof(bc_type(dir, loc)%sf) + end do + end do + + ! Write bc_buffers + do dir = 1, num_dims + do loc = -1, 1, 2 + call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) + call MPI_File_write_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) + offset = offset + sizeof(bc_buffers(dir, loc)%sf) + end do + end do + + call MPI_File_close(file_id, ierr) +#endif + + end subroutine s_write_parallel_boundary_condition_files + + subroutine s_read_serial_boundary_condition_files(step_dirpath, bc_type) + + character(LEN=*), intent(in) :: step_dirpath + + type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type + + integer :: dir, loc + logical :: file_exist + character(len=path_len) :: file_path + + character(len=10) :: status + + ! Read bc_types + file_path = trim(step_dirpath)//'/bc_type.dat' + inquire (FILE=trim(file_path), EXIST=file_exist) + if (.not. file_exist) then + call s_mpi_abort(trim(file_path)//' is missing. Exiting.') + end if + + open (1, FILE=trim(file_path), FORM='unformatted', STATUS='unknown') + do dir = 1, num_dims + do loc = -1, 1, 2 + read (1) bc_type(dir, loc)%sf + !$acc update device(bc_type(dir, loc)%sf) + end do + end do + close (1) + + ! Read bc_buffers + file_path = trim(step_dirpath)//'/bc_buffers.dat' + inquire (FILE=trim(file_path), EXIST=file_exist) + if (.not. file_exist) then + call s_mpi_abort(trim(file_path)//' is missing. Exiting.') + end if + + open (1, FILE=trim(file_path), FORM='unformatted', STATUS='unknown') + do dir = 1, num_dims + do loc = -1, 1, 2 + read (1) bc_buffers(dir, loc)%sf + !$acc update device(bc_buffers(dir, loc)%sf) + end do + end do + close (1) + + end subroutine s_read_serial_boundary_condition_files + + subroutine s_read_parallel_boundary_condition_files(bc_type) + + type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type + + integer :: dir, loc + character(len=path_len) :: file_loc, file_path + + character(len=10) :: status + +#ifdef MFC_MPI + integer :: ierr + integer :: file_id + integer :: offset + character(len=7) :: proc_rank_str + logical :: dir_check + + file_loc = trim(case_dir)//'/restart_data/boundary_conditions' + + if (proc_rank == 0) then + call my_inquire(file_loc, dir_check) + if (dir_check .neqv. .true.) then + call s_mpi_abort(trim(file_loc)//' is missing. Exiting.') + end if + end if + + call s_create_mpi_types(bc_type) + + call s_mpi_barrier() + + call DelayFileAccess(proc_rank) + + write (proc_rank_str, '(I7.7)') proc_rank + file_path = trim(file_loc)//'/bc_'//trim(proc_rank_str)//'.dat' + call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_RDONLY, MPI_INFO_NULL, file_id, ierr) + + offset = 0 + + ! Read bc_types + do dir = 1, num_dims + do loc = -1, 1, 2 + call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) + call MPI_File_read_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) + offset = offset + sizeof(bc_type(dir, loc)%sf) + !$acc update device(bc_type(dir, loc)%sf) + end do + end do + + ! Read bc_buffers + do dir = 1, num_dims + do loc = -1, 1, 2 + call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) + call MPI_File_read_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) + offset = offset + sizeof(bc_buffers(dir, loc)%sf) + !$acc update device(bc_buffers(dir, loc)%sf) + end do + end do + + call MPI_File_close(file_id, ierr) +#endif + + end subroutine s_read_parallel_boundary_condition_files + + subroutine s_pack_boundary_condition_buffers(q_prim_vf) + + type(scalar_field), dimension(sys_size) :: q_prim_vf + integer :: i, j, k + + do k = 0, p + do j = 0, n + do i = 1, sys_size + bc_buffers(1, -1)%sf(i, j, k) = q_prim_vf(i)%sf(0, j, k) + bc_buffers(1, 1)%sf(i, j, k) = q_prim_vf(i)%sf(m, j, k) + end do + end do + end do + + if (n > 0) then + do k = 0, p + do j = 1, sys_size + do i = 0, m + bc_buffers(2, -1)%sf(i, j, k) = q_prim_vf(j)%sf(i, 0, k) + bc_buffers(2, 1)%sf(i, j, k) = q_prim_vf(j)%sf(i, n, k) + end do + end do + end do + + if (p > 0) then + do k = 1, sys_size + do j = 0, n + do i = 0, m + bc_buffers(3, -1)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, 0) + bc_buffers(3, 1)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, p) + end do + end do + end do + end if + end if + + end subroutine s_pack_boundary_condition_buffers + + subroutine s_assign_default_bc_type(bc_type) + + type(integer_field), dimension(1:num_dims, -1:1), intent(in) :: bc_type + + bc_type(1, -1)%sf(:, :, :) = bc_x%beg + bc_type(1, 1)%sf(:, :, :) = bc_x%end + !$acc update device(bc_type(1,-1)%sf, bc_type(1,1)%sf) + + if (n > 0) then + bc_type(2, -1)%sf(:, :, :) = bc_y%beg + bc_type(2, 1)%sf(:, :, :) = bc_y%end + !$acc update device(bc_type(2,-1)%sf, bc_type(2,1)%sf) + + if (p > 0) then + bc_type(3, -1)%sf(:, :, :) = bc_z%beg + bc_type(3, 1)%sf(:, :, :) = bc_z%end + !$acc update device(bc_type(3,-1)%sf, bc_type(3,1)%sf) + end if + end if + + end subroutine s_assign_default_bc_type + + !> The purpose of this subroutine is to populate the buffers + !! of the grid variables, which are constituted of the cell- + !! boundary locations and cell-width distributions, based on + !! the boundary conditions. + subroutine s_populate_grid_variables_buffers + + integer :: i !< Generic loop iterator + +#ifdef MFC_SIMULATION + ! Required for compatibility between codes + type(int_bounds_info) :: offset_x, offset_y, offset_z + offset_x%beg = buff_size; offset_x%end = buff_size + offset_y%beg = buff_size; offset_y%end = buff_size + offset_z%beg = buff_size; offset_z%end = buff_size +#endif + +#ifndef MFC_PRE_PROCESS + ! Population of Buffers in x-direction + + ! Populating cell-width distribution buffer at bc_x%beg + if (bc_x%beg >= 0) then + call s_mpi_sendrecv_grid_variables_buffers(1, -1) + elseif (bc_x%beg <= BC_GHOST_EXTRAP) then + do i = 1, buff_size + dx(-i) = dx(0) + end do + elseif (bc_x%beg == BC_REFLECTIVE) then + do i = 1, buff_size + dx(-i) = dx(i - 1) + end do + elseif (bc_x%beg == BC_PERIODIC) then + do i = 1, buff_size + dx(-i) = dx(m - (i - 1)) + end do + end if + + ! Computing the cell-boundary and center locations buffer at bc_x%beg + do i = 1, offset_x%beg + x_cb(-1 - i) = x_cb(-i) - dx(-i) + end do + + do i = 1, buff_size + x_cc(-i) = x_cc(1 - i) - (dx(1 - i) + dx(-i))/2._wp + end do + + ! Populating the cell-width distribution buffer at bc_x%end + if (bc_x%end >= 0) then + call s_mpi_sendrecv_grid_variables_buffers(1, 1) + elseif (bc_x%end <= BC_GHOST_EXTRAP) then + do i = 1, buff_size + dx(m + i) = dx(m) + end do + elseif (bc_x%end == BC_REFLECTIVE) then + do i = 1, buff_size + dx(m + i) = dx(m - (i - 1)) + end do + elseif (bc_x%end == BC_PERIODIC) then + do i = 1, buff_size + dx(m + i) = dx(i - 1) + end do + end if + + ! Populating the cell-boundary and center locations buffer at bc_x%end + do i = 1, offset_x%end + x_cb(m + i) = x_cb(m + (i - 1)) + dx(m + i) + end do + + do i = 1, buff_size + x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2._wp + end do + ! END: Population of Buffers in x-direction + + ! Population of Buffers in y-direction + + ! Populating cell-width distribution buffer at bc_y%beg + if (n == 0) then + return + elseif (bc_y%beg >= 0) then + call s_mpi_sendrecv_grid_variables_buffers(2, -1) + elseif (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_AXIS) then + do i = 1, buff_size + dy(-i) = dy(0) + end do + elseif (bc_y%beg == BC_REFLECTIVE .or. bc_y%beg == BC_AXIS) then + do i = 1, buff_size + dy(-i) = dy(i - 1) + end do + elseif (bc_y%beg == BC_PERIODIC) then + do i = 1, buff_size + dy(-i) = dy(n - (i - 1)) + end do + end if + + ! Computing the cell-boundary and center locations buffer at bc_y%beg + do i = 1, offset_y%beg + y_cb(-1 - i) = y_cb(-i) - dy(-i) + end do + + do i = 1, buff_size + y_cc(-i) = y_cc(1 - i) - (dy(1 - i) + dy(-i))/2._wp + end do + + ! Populating the cell-width distribution buffer at bc_y%end + if (bc_y%end >= 0) then + call s_mpi_sendrecv_grid_variables_buffers(2, 1) + elseif (bc_y%end <= BC_GHOST_EXTRAP) then + do i = 1, buff_size + dy(n + i) = dy(n) + end do + elseif (bc_y%end == BC_REFLECTIVE) then + do i = 1, buff_size + dy(n + i) = dy(n - (i - 1)) + end do + elseif (bc_y%end == BC_PERIODIC) then + do i = 1, buff_size + dy(n + i) = dy(i - 1) + end do + end if + + ! Populating the cell-boundary and center locations buffer at bc_y%end + do i = 1, offset_y%end + y_cb(n + i) = y_cb(n + (i - 1)) + dy(n + i) + end do + + do i = 1, buff_size + y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2._wp + end do + ! END: Population of Buffers in y-direction + + ! Population of Buffers in z-direction + + ! Populating cell-width distribution buffer at bc_z%beg + if (p == 0) then + return + elseif (Bc_z%beg >= 0) then + call s_mpi_sendrecv_grid_variables_buffers(3, -1) + elseif (bc_z%beg <= BC_GHOST_EXTRAP) then + do i = 1, buff_size + dz(-i) = dz(0) + end do + elseif (bc_z%beg == BC_REFLECTIVE) then + do i = 1, buff_size + dz(-i) = dz(i - 1) + end do + elseif (bc_z%beg == BC_PERIODIC) then + do i = 1, buff_size + dz(-i) = dz(p - (i - 1)) + end do + end if + + ! Computing the cell-boundary and center locations buffer at bc_z%beg + do i = 1, offset_z%beg + z_cb(-1 - i) = z_cb(-i) - dz(-i) + end do + + do i = 1, buff_size + z_cc(-i) = z_cc(1 - i) - (dz(1 - i) + dz(-i))/2._wp + end do + + ! Populating the cell-width distribution buffer at bc_z%end + if (bc_z%end >= 0) then + call s_mpi_sendrecv_grid_variables_buffers(3, 1) + elseif (bc_z%end <= BC_GHOST_EXTRAP) then + do i = 1, buff_size + dz(p + i) = dz(p) + end do + elseif (bc_z%end == BC_REFLECTIVE) then + do i = 1, buff_size + dz(p + i) = dz(p - (i - 1)) + end do + elseif (bc_z%end == BC_PERIODIC) then + do i = 1, buff_size + dz(p + i) = dz(i - 1) + end do + end if + + ! Populating the cell-boundary and center locations buffer at bc_z%end + do i = 1, offset_z%end + z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i) + end do + + do i = 1, buff_size + z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2._wp + end do + ! END: Population of Buffers in z-direction + +#endif + + end subroutine s_populate_grid_variables_buffers + + subroutine s_finalize_boundary_common_module() -#ifndef MFC_POST_PROCESS if (bc_io) then deallocate (bc_buffers(1, -1)%sf) deallocate (bc_buffers(1, 1)%sf) @@ -1524,7 +1987,7 @@ contains end if end if end if -#endif + deallocate (bc_buffers) end subroutine s_finalize_boundary_common_module diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index b439ce1c6d..4a96787e15 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -24,24 +24,24 @@ module m_mpi_common implicit none - integer, private :: err_code, ierr, v_size !< + integer, private :: ierr, v_size !< !$acc declare create(v_size) !! Generic flags used to identify and report MPI errors - real(wp), private, allocatable, dimension(:), target :: buff_send !< + real(wp), private, allocatable, dimension(:) :: buff_send !< !! This variable is utilized to pack and send the buffer of the cell-average !! primitive variables, for a single computational domain boundary at the !! time, to the relevant neighboring processor. - real(wp), private, allocatable, dimension(:), target :: buff_recv !< + real(wp), private, allocatable, dimension(:) :: buff_recv !< !! buff_recv is utilized to receive and unpack the buffer of the cell- !! average primitive variables, for a single computational domain boundary !! at the time, from the relevant neighboring processor. !$acc declare create(buff_send, buff_recv) - integer :: halo_size, nVars - !$acc declare create(halo_size, nVars) + integer :: halo_size + !$acc declare create(halo_size) contains @@ -51,46 +51,34 @@ contains impure subroutine s_initialize_mpi_common_module #ifdef MFC_MPI - ! Allocating buff_send/recv and ib_buff_send/recv. Please note that - ! for the sake of simplicity, both variables are provided sufficient - ! storage to hold the largest buffer in the computational domain. + ! Allocating buff_send/recv and. Please note that for the sake of + ! simplicity, both variables are provided sufficient storage to hold + ! the largest buffer in the computational domain. + if (qbmm .and. .not. polytropic) then - if (n > 0) then - if (p > 0) then - halo_size = -1 + buff_size*(sys_size + 2*nb*4)* & - & (m + 2*buff_size + 1)* & - & (n + 2*buff_size + 1)* & - & (p + 2*buff_size + 1)/ & - & (min(m, n, p) + 2*buff_size + 1) - else - halo_size = -1 + buff_size*(sys_size + 2*nb*4)* & - & (max(m, n) + 2*buff_size + 1) - end if - else - halo_size = -1 + buff_size*(sys_size + 2*nb*4) - end if + v_size = sys_size + 2*nb*4 else - if (n > 0) then - if (p > 0) then - halo_size = -1 + buff_size*sys_size* & - & (m + 2*buff_size + 1)* & - & (n + 2*buff_size + 1)* & - & (p + 2*buff_size + 1)/ & - & (min(m, n, p) + 2*buff_size + 1) - else - halo_size = -1 + buff_size*sys_size* & - & (max(m, n) + 2*buff_size + 1) - end if + v_size = sys_size + end if + + if (n > 0) then + if (p > 0) then + halo_size = nint(-1._wp + 1._wp*buff_size*(v_size)* & + & (m + 2*buff_size + 1)* & + & (n + 2*buff_size + 1)* & + & (p + 2*buff_size + 1)/ & + & (min(m, n, p) + 2*buff_size + 1)) else - halo_size = -1 + buff_size*sys_size + halo_size = -1 + buff_size*(v_size)* & + & (max(m, n) + 2*buff_size + 1) end if + else + halo_size = -1 + buff_size*(v_size) end if - v_size = sys_size + !$acc update device(halo_size, v_size) - allocate (buff_send(0:halo_size)) - - allocate (buff_recv(0:ubound(buff_send, 1))) + @:ALLOCATE(buff_send(0:halo_size), buff_recv(0:halo_size)) #endif end subroutine s_initialize_mpi_common_module @@ -135,24 +123,11 @@ contains !! @param beta Eulerian void fraction from lagrangian bubbles impure subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm, beta) - type(scalar_field), & - dimension(sys_size), & - intent(in) :: q_cons_vf - - type(integer_field), & - optional, & - intent(in) :: ib_markers - - type(levelset_field), & - optional, & - intent(IN) :: levelset - - type(levelset_norm_field), & - optional, & - intent(IN) :: levelset_norm - - type(scalar_field), & - intent(in), optional :: beta + type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf + type(integer_field), optional, intent(in) :: ib_markers + type(levelset_field), optional, intent(IN) :: levelset + type(levelset_norm_field), optional, intent(IN) :: levelset_norm + type(scalar_field), intent(in), optional :: beta integer, dimension(num_dims) :: sizes_glb, sizes_loc integer, dimension(1) :: airfoil_glb, airfoil_loc, airfoil_start @@ -611,15 +586,15 @@ contains !! @param q_cons_vf Cell-average conservative variables !! @param mpi_dir MPI communication coordinate direction !! @param pbc_loc Processor boundary condition (PBC) location - subroutine s_mpi_sendrecv_variables_buffers(q_cons_vf, & - pb, mv, & + subroutine s_mpi_sendrecv_variables_buffers(q_comm, & mpi_dir, & - pbc_loc) - - type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv + pbc_loc, & + nVar, & + pb, mv) - integer, intent(in) :: mpi_dir, pbc_loc + type(scalar_field), dimension(1:), intent(inout) :: q_comm + real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv + integer, intent(in) :: mpi_dir, pbc_loc, nVar integer :: i, j, k, l, r, q !< Generic loop iterators @@ -629,7 +604,7 @@ contains integer :: beg_end(1:2), grid_dims(1:3) integer :: dst_proc, src_proc, recv_tag, send_tag - logical :: beg_end_geq_0 + logical :: beg_end_geq_0, qbmm_comm integer :: pack_offset, unpack_offset @@ -638,25 +613,27 @@ contains #ifdef MFC_MPI call nvtxStartRange("RHS-COMM-PACKBUF") -!$acc update device(v_size) -#ifdef MFC_SIMULATION - if (qbmm .and. .not. polytropic) then + qbmm_comm = .false. + + if (present(pb) .and. present(mv) .and. qbmm .and. .not. polytropic) then + qbmm_comm = .true. + v_size = nVar + 2*nb*4 buffer_counts = (/ & - buff_size*(sys_size + 2*nb*4)*(n + 1)*(p + 1), & - buff_size*(sys_size + 2*nb*4)*(m + 2*buff_size + 1)*(p + 1), & + buff_size*v_size*(n + 1)*(p + 1), & + buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), & buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & /) else -#endif + v_size = nVar buffer_counts = (/ & - buff_size*sys_size*(n + 1)*(p + 1), & - buff_size*sys_size*(m + 2*buff_size + 1)*(p + 1), & + buff_size*v_size*(n + 1)*(p + 1), & + buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), & buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & /) -#ifdef MFC_SIMULATION end if -#endif + + !$acc update device(v_size) buffer_count = buffer_counts(mpi_dir) boundary_conditions = (/bc_x, bc_y, bc_z/) @@ -696,25 +673,24 @@ contains do l = 0, p do k = 0, n do j = 0, buff_size - 1 - do i = 1, sys_size + do i = 1, nVar r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) - buff_send(r) = q_cons_vf(i)%sf(j + pack_offset, k, l) + buff_send(r) = q_comm(i)%sf(j + pack_offset, k, l) end do end do end do end do -#ifdef MFC_SIMULATION - if (qbmm .and. .not. polytropic) then + if (qbmm_comm) then !$acc parallel loop collapse(4) gang vector default(present) private(r) do l = 0, p do k = 0, n do j = 0, buff_size - 1 - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do q = 1, nb r = (i - 1) + (q - 1)*4 + v_size* & (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = pb(j + pack_offset, k, l, i - sys_size, q) + buff_send(r) = pb(j + pack_offset, k, l, i - nVar, q) end do end do end do @@ -725,37 +701,35 @@ contains do l = 0, p do k = 0, n do j = 0, buff_size - 1 - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do q = 1, nb r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = mv(j + pack_offset, k, l, i - sys_size, q) + buff_send(r) = mv(j + pack_offset, k, l, i - nVar, q) end do end do end do end do end do end if -#endif #:elif mpi_dir == 2 !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, sys_size + do i = 1, nVar do l = 0, p do k = 0, buff_size - 1 do j = -buff_size, m + buff_size r = (i - 1) + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & (k + buff_size*l)) - buff_send(r) = q_cons_vf(i)%sf(j, k + pack_offset, l) + buff_send(r) = q_comm(i)%sf(j, k + pack_offset, l) end do end do end do end do -#ifdef MFC_SIMULATION - if (qbmm .and. .not. polytropic) then + if (qbmm_comm) then !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do l = 0, p do k = 0, buff_size - 1 do j = -buff_size, m + buff_size @@ -763,7 +737,7 @@ contains r = (i - 1) + (q - 1)*4 + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & (k + buff_size*l)) - buff_send(r) = pb(j, k + pack_offset, l, i - sys_size, q) + buff_send(r) = pb(j, k + pack_offset, l, i - nVar, q) end do end do end do @@ -771,7 +745,7 @@ contains end do !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do l = 0, p do k = 0, buff_size - 1 do j = -buff_size, m + buff_size @@ -779,33 +753,31 @@ contains r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & (k + buff_size*l)) - buff_send(r) = mv(j, k + pack_offset, l, i - sys_size, q) + buff_send(r) = mv(j, k + pack_offset, l, i - nVar, q) end do end do end do end do end do end if -#endif #:else !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, sys_size + do i = 1, nVar do l = 0, buff_size - 1 do k = -buff_size, n + buff_size do j = -buff_size, m + buff_size r = (i - 1) + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = q_cons_vf(i)%sf(j, k, l + pack_offset) + buff_send(r) = q_comm(i)%sf(j, k, l + pack_offset) end do end do end do end do -#ifdef MFC_SIMULATION - if (qbmm .and. .not. polytropic) then + if (qbmm_comm) then !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size do j = -buff_size, m + buff_size @@ -813,7 +785,7 @@ contains r = (i - 1) + (q - 1)*4 + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = pb(j, k, l + pack_offset, i - sys_size, q) + buff_send(r) = pb(j, k, l + pack_offset, i - nVar, q) end do end do end do @@ -821,7 +793,7 @@ contains end do !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size do j = -buff_size, m + buff_size @@ -829,29 +801,24 @@ contains r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = mv(j, k, l + pack_offset, i - sys_size, q) + buff_send(r) = mv(j, k, l + pack_offset, i - nVar, q) end do end do end do end do end do end if -#endif #:endif end if #:endfor call nvtxEndRange ! Packbuf - p_send => buff_send(0) - p_recv => buff_recv(0) - ! Send/Recv #ifdef MFC_SIMULATION #:for rdma_mpi in [False, True] if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then #:if rdma_mpi - !$acc data attach(p_send, p_recv) - !$acc host_data use_device(p_send, p_recv) + !$acc host_data use_device(buff_send, buff_recv) call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") #:else call nvtxStartRange("RHS-COMM-DEV2HOST") @@ -861,15 +828,14 @@ contains #:endif call MPI_SENDRECV( & - p_send, buffer_count, mpi_p, dst_proc, send_tag, & - p_recv, buffer_count, mpi_p, src_proc, recv_tag, & + buff_send, buffer_count, mpi_p, dst_proc, send_tag, & + buff_recv, buffer_count, mpi_p, src_proc, recv_tag, & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA #:if rdma_mpi !$acc end host_data - !$acc end data !$acc wait #:else call nvtxStartRange("RHS-COMM-HOST2DEV") @@ -880,8 +846,8 @@ contains #:endfor #else call MPI_SENDRECV( & - p_send, buffer_count, mpi_p, dst_proc, send_tag, & - p_recv, buffer_count, mpi_p, src_proc, recv_tag, & + buff_send, buffer_count, mpi_p, dst_proc, send_tag, & + buff_recv, buffer_count, mpi_p, src_proc, recv_tag, & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) #endif @@ -894,12 +860,12 @@ contains do l = 0, p do k = 0, n do j = -buff_size, -1 - do i = 1, sys_size + do i = 1, nVar r = (i - 1) + v_size* & (j + buff_size*((k + 1) + (n + 1)*l)) - q_cons_vf(i)%sf(j + unpack_offset, k, l) = buff_recv(r) + q_comm(i)%sf(j + unpack_offset, k, l) = buff_recv(r) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then print *, "Error", j, k, l, i error stop "NaN(s) in recv" end if @@ -909,17 +875,16 @@ contains end do end do -#ifdef MFC_SIMULATION - if (qbmm .and. .not. polytropic) then + if (qbmm_comm) then !$acc parallel loop collapse(5) gang vector default(present) private(r) do l = 0, p do k = 0, n do j = -buff_size, -1 - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do q = 1, nb r = (i - 1) + (q - 1)*4 + v_size* & (j + buff_size*((k + 1) + (n + 1)*l)) - pb(j + unpack_offset, k, l, i - sys_size, q) = buff_recv(r) + pb(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r) end do end do end do @@ -930,30 +895,29 @@ contains do l = 0, p do k = 0, n do j = -buff_size, -1 - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do q = 1, nb r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & (j + buff_size*((k + 1) + (n + 1)*l)) - mv(j + unpack_offset, k, l, i - sys_size, q) = buff_recv(r) + mv(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r) end do end do end do end do end do end if -#endif #:elif mpi_dir == 2 !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, sys_size + do i = 1, nVar do l = 0, p do k = -buff_size, -1 do j = -buff_size, m + buff_size r = (i - 1) + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + buff_size*l)) - q_cons_vf(i)%sf(j, k + unpack_offset, l) = buff_recv(r) + q_comm(i)%sf(j, k + unpack_offset, l) = buff_recv(r) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then print *, "Error", j, k, l, i error stop "NaN(s) in recv" end if @@ -963,10 +927,9 @@ contains end do end do -#ifdef MFC_SIMULATION - if (qbmm .and. .not. polytropic) then + if (qbmm_comm) then !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do l = 0, p do k = -buff_size, -1 do j = -buff_size, m + buff_size @@ -974,7 +937,7 @@ contains r = (i - 1) + (q - 1)*4 + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + buff_size*l)) - pb(j, k + unpack_offset, l, i - sys_size, q) = buff_recv(r) + pb(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r) end do end do end do @@ -982,7 +945,7 @@ contains end do !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do l = 0, p do k = -buff_size, -1 do j = -buff_size, m + buff_size @@ -990,18 +953,17 @@ contains r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + buff_size*l)) - mv(j, k + unpack_offset, l, i - sys_size, q) = buff_recv(r) + mv(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r) end do end do end do end do end do end if -#endif #:else ! Unpacking buffer from bc_z%beg !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, sys_size + do i = 1, nVar do l = -buff_size, -1 do k = -buff_size, n + buff_size do j = -buff_size, m + buff_size @@ -1009,9 +971,9 @@ contains ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + (n + 2*buff_size + 1)* & (l + buff_size))) - q_cons_vf(i)%sf(j, k, l + unpack_offset) = buff_recv(r) + q_comm(i)%sf(j, k, l + unpack_offset) = buff_recv(r) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then print *, "Error", j, k, l, i error stop "NaN(s) in recv" end if @@ -1021,10 +983,9 @@ contains end do end do -#ifdef MFC_SIMULATION - if (qbmm .and. .not. polytropic) then + if (qbmm_comm) then !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do l = -buff_size, -1 do k = -buff_size, n + buff_size do j = -buff_size, m + buff_size @@ -1033,7 +994,7 @@ contains ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + (n + 2*buff_size + 1)* & (l + buff_size))) - pb(j, k, l + unpack_offset, i - sys_size, q) = buff_recv(r) + pb(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r) end do end do end do @@ -1041,7 +1002,7 @@ contains end do !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do l = -buff_size, -1 do k = -buff_size, n + buff_size do j = -buff_size, m + buff_size @@ -1050,246 +1011,682 @@ contains ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + (n + 2*buff_size + 1)* & (l + buff_size))) - mv(j, k, l + unpack_offset, i - sys_size, q) = buff_recv(r) + mv(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r) end do end do end do end do end do end if -#endif #:endif end if #:endfor call nvtxEndRange - #endif end subroutine s_mpi_sendrecv_variables_buffers - subroutine s_mpi_sendrecv_capilary_variables_buffers(c_divs_vf, mpi_dir, pbc_loc) + !> The purpose of this procedure is to optimally decompose + !! the computational domain among the available processors. + !! This is performed by attempting to award each processor, + !! in each of the coordinate directions, approximately the + !! same number of cells, and then recomputing the affected + !! global parameters. + subroutine s_mpi_decompose_computational_domain - type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs_vf - integer, intent(in) :: mpi_dir, pbc_loc +#ifdef MFC_MPI - integer :: i, j, k, l, r !< Generic loop iterators + integer :: num_procs_x, num_procs_y, num_procs_z !< + !! Optimal number of processors in the x-, y- and z-directions - integer :: buffer_counts(1:3), buffer_count + real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z !< + !! Non-optimal number of processors in the x-, y- and z-directions - type(int_bounds_info) :: boundary_conditions(1:3) - integer :: beg_end(1:2), grid_dims(1:3) - integer :: dst_proc, src_proc, recv_tag, send_tag + real(wp) :: fct_min !< + !! Processor factorization (fct) minimization parameter - logical :: beg_end_geq_0 + integer :: MPI_COMM_CART !< + !! Cartesian processor topology communicator - integer :: pack_offset, unpack_offset - real(wp), pointer :: p_send, p_recv + integer :: rem_cells !< + !! Remaining number of cells, in a particular coordinate direction, + !! after the majority is divided up among the available processors -#ifdef MFC_MPI + integer :: i, j !< Generic loop iterators - nVars = num_dims + 1 - !$acc update device(nVars) + if (num_procs == 1 .and. parallel_io) then + do i = 1, num_dims + start_idx(i) = 0 + end do + return + end if - buffer_counts = (/ & - buff_size*nVars*(n + 1)*(p + 1), & - buff_size*nVars*(m + 2*buff_size + 1)*(p + 1), & - buff_size*nVars*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & - /) + ! 3D Cartesian Processor Topology + if (n > 0) then - buffer_count = buffer_counts(mpi_dir) - boundary_conditions = (/bc_x, bc_y, bc_z/) - beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) - beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 + if (p > 0) then - ! Implements: - ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc - ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] - ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg] - ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] - ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end] + if (cyl_coord .and. p > 0) then + ! Implement pencil processor blocking if using cylindrical coordinates so + ! that all cells in azimuthal direction are stored on a single processor. + ! This is necessary for efficient application of Fourier filter near axis. - send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) - recv_tag = f_logical_to_int(pbc_loc == 1) + ! Initial values of the processor factorization optimization + num_procs_x = 1 + num_procs_y = num_procs + num_procs_z = 1 + ierr = -1 - dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) - src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) + ! Computing minimization variable for these initial values + tmp_num_procs_x = num_procs_x + tmp_num_procs_y = num_procs_y + tmp_num_procs_z = num_procs_z + fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) - grid_dims = (/m, n, p/) + ! Searching for optimal computational domain distribution + do i = 1, num_procs - pack_offset = 0 - if (f_xor(pbc_loc == 1, beg_end_geq_0)) then - pack_offset = grid_dims(mpi_dir) - buff_size + 1 - end if + if (mod(num_procs, i) == 0 & + .and. & + (m + 1)/i >= num_stcls_min*weno_order) then - unpack_offset = 0 - if (pbc_loc == 1) then - unpack_offset = grid_dims(mpi_dir) + buff_size + 1 - end if + tmp_num_procs_x = i + tmp_num_procs_y = num_procs/i - ! Pack Buffer to Send - #:for mpi_dir in [1, 2, 3] - if (mpi_dir == ${mpi_dir}$) then - #:if mpi_dir == 1 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, nVars - r = (i - 1) + nVars*(j + buff_size*(k + (n + 1)*l)) - buff_send(r) = c_divs_vf(i)%sf(j + pack_offset, k, l) - end do - end do - end do - end do + if (fct_min >= abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) & + .and. & + (n + 1)/tmp_num_procs_y & + >= & + num_stcls_min*weno_order) then + + num_procs_x = i + num_procs_y = num_procs/i + fct_min = abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) + ierr = 0 + + end if + + end if - #:elif mpi_dir == 2 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVars - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - r = (i - 1) + nVars* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = c_divs_vf(i)%sf(j, k + pack_offset, l) - end do - end do - end do end do - #:else - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVars - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + nVars* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = c_divs_vf(i)%sf(j, k, l + pack_offset) - end do + else + + ! Initial estimate of optimal processor topology + num_procs_x = 1 + num_procs_y = 1 + num_procs_z = num_procs + ierr = -1 + + ! Benchmarking the quality of this initial guess + tmp_num_procs_x = num_procs_x + tmp_num_procs_y = num_procs_y + tmp_num_procs_z = num_procs_z + fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) & + + 10._wp*abs((n + 1)/tmp_num_procs_y & + - (p + 1)/tmp_num_procs_z) + + ! Optimization of the initial processor topology + do i = 1, num_procs + + if (mod(num_procs, i) == 0 & + .and. & + (m + 1)/i >= num_stcls_min*weno_order) then + + do j = 1, num_procs/i + + if (mod(num_procs/i, j) == 0 & + .and. & + (n + 1)/j >= num_stcls_min*weno_order) then + + tmp_num_procs_x = i + tmp_num_procs_y = j + tmp_num_procs_z = num_procs/(i*j) + + if (fct_min >= abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) & + + abs((n + 1)/tmp_num_procs_y & + - (p + 1)/tmp_num_procs_z) & + .and. & + (p + 1)/tmp_num_procs_z & + >= & + num_stcls_min*weno_order) & + then + + num_procs_x = i + num_procs_y = j + num_procs_z = num_procs/(i*j) + fct_min = abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) & + + abs((n + 1)/tmp_num_procs_y & + - (p + 1)/tmp_num_procs_z) + ierr = 0 + + end if + + end if + end do - end do + + end if + end do - #:endif + + end if + + ! Verifying that a valid decomposition of the computational + ! domain has been established. If not, the simulation exits. + if (proc_rank == 0 .and. ierr == -1) then + call s_mpi_abort('Unsupported combination of values '// & + 'of num_procs, m, n, p and '// & + 'weno_order. Exiting.') + end if + + ! Creating new communicator using the Cartesian topology + call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, & + num_procs_y, num_procs_z/), & + (/.true., .true., .true./), & + .false., MPI_COMM_CART, ierr) + + ! Finding the Cartesian coordinates of the local process + call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, & + proc_coords, ierr) + ! END: 3D Cartesian Processor Topology + + ! Global Parameters for z-direction + + ! Number of remaining cells + rem_cells = mod(p + 1, num_procs_z) + + ! Optimal number of cells per processor + p = (p + 1)/num_procs_z - 1 + + ! Distributing the remaining cells + do i = 1, rem_cells + if (proc_coords(3) == i - 1) then + p = p + 1; exit + end if + end do + + ! Boundary condition at the beginning + if (proc_coords(3) > 0 .or. (bc_z%beg == BC_PERIODIC .and. num_procs_z > 1)) then + proc_coords(3) = proc_coords(3) - 1 + call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & + bc_z%beg, ierr) + proc_coords(3) = proc_coords(3) + 1 + end if + + ! Boundary condition at the end + if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == BC_PERIODIC .and. num_procs_z > 1)) then + proc_coords(3) = proc_coords(3) + 1 + call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & + bc_z%end, ierr) + proc_coords(3) = proc_coords(3) - 1 + end if + +#ifdef MFC_POST_PROCESS + ! Ghost zone at the beginning + if (proc_coords(3) > 0 .and. format == 1) then + offset_z%beg = 2 + else + offset_z%beg = 0 + end if + + ! Ghost zone at the end + if (proc_coords(3) < num_procs_z - 1 .and. format == 1) then + offset_z%end = 2 + else + offset_z%end = 0 + end if +#endif + + ! Beginning and end sub-domain boundary locations + if (parallel_io) then + if (proc_coords(3) < rem_cells) then + start_idx(3) = (p + 1)*proc_coords(3) + else + start_idx(3) = (p + 1)*proc_coords(3) + rem_cells + end if + else +#ifdef MFC_PRE_PROCESS + if (old_grid .neqv. .true.) then + dz = (z_domain%end - z_domain%beg)/real(p_glb + 1, wp) + + if (proc_coords(3) < rem_cells) then + z_domain%beg = z_domain%beg + dz*real((p + 1)* & + proc_coords(3)) + z_domain%end = z_domain%end - dz*real((p + 1)* & + (num_procs_z - proc_coords(3) - 1) & + - (num_procs_z - rem_cells)) + else + z_domain%beg = z_domain%beg + dz*real((p + 1)* & + proc_coords(3) + rem_cells) + z_domain%end = z_domain%end - dz*real((p + 1)* & + (num_procs_z - proc_coords(3) - 1)) + end if + end if +#endif + end if + + ! 2D Cartesian Processor Topology + else + + ! Initial estimate of optimal processor topology + num_procs_x = 1 + num_procs_y = num_procs + ierr = -1 + + ! Benchmarking the quality of this initial guess + tmp_num_procs_x = num_procs_x + tmp_num_procs_y = num_procs_y + fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) + + ! Optimization of the initial processor topology + do i = 1, num_procs + + if (mod(num_procs, i) == 0 & + .and. & + (m + 1)/i >= num_stcls_min*weno_order) then + + tmp_num_procs_x = i + tmp_num_procs_y = num_procs/i + + if (fct_min >= abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) & + .and. & + (n + 1)/tmp_num_procs_y & + >= & + num_stcls_min*weno_order) then + + num_procs_x = i + num_procs_y = num_procs/i + fct_min = abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) + ierr = 0 + + end if + + end if + + end do + + ! Verifying that a valid decomposition of the computational + ! domain has been established. If not, the simulation exits. + if (proc_rank == 0 .and. ierr == -1) then + call s_mpi_abort('Unsupported combination of values '// & + 'of num_procs, m, n and '// & + 'weno_order. Exiting.') + end if + + ! Creating new communicator using the Cartesian topology + call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, & + num_procs_y/), (/.true., & + .true./), .false., MPI_COMM_CART, & + ierr) + + ! Finding the Cartesian coordinates of the local process + call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, & + proc_coords, ierr) + end if - #:endfor - p_send => buff_send(0) - p_recv => buff_recv(0) - ! Send/Recv -#ifdef MFC_SIMULATION - #:for rdma_mpi in [False, True] - if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then - #:if rdma_mpi - !$acc data attach(p_send, p_recv) - !$acc host_data use_device(p_send, p_recv) - call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") - #:else - call nvtxStartRange("RHS-COMM-DEV2HOST") - !$acc update host(buff_send) - call nvtxEndRange - call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA") - #:endif + ! END: 2D Cartesian Processor Topology - call MPI_SENDRECV( & - p_send, buffer_count, mpi_p, dst_proc, send_tag, & - p_recv, buffer_count, mpi_p, src_proc, recv_tag, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + ! Global Parameters for y-direction - call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA + ! Number of remaining cells + rem_cells = mod(n + 1, num_procs_y) - #:if rdma_mpi - !$acc end host_data - !$acc end data - !$acc wait - #:else - call nvtxStartRange("RHS-COMM-HOST2DEV") - !$acc update device(buff_recv) - call nvtxEndRange - #:endif + ! Optimal number of cells per processor + n = (n + 1)/num_procs_y - 1 + + ! Distributing the remaining cells + do i = 1, rem_cells + if (proc_coords(2) == i - 1) then + n = n + 1; exit + end if + end do + + ! Boundary condition at the beginning + if (proc_coords(2) > 0 .or. (bc_y%beg == BC_PERIODIC .and. num_procs_y > 1)) then + proc_coords(2) = proc_coords(2) - 1 + call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & + bc_y%beg, ierr) + proc_coords(2) = proc_coords(2) + 1 + end if + + ! Boundary condition at the end + if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == BC_PERIODIC .and. num_procs_y > 1)) then + proc_coords(2) = proc_coords(2) + 1 + call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & + bc_y%end, ierr) + proc_coords(2) = proc_coords(2) - 1 + end if + +#ifdef MFC_POST_PROCESS + ! Ghost zone at the beginning + if (proc_coords(2) > 0 .and. format == 1) then + offset_y%beg = 2 + else + offset_y%beg = 0 + end if + + ! Ghost zone at the end + if (proc_coords(2) < num_procs_y - 1 .and. format == 1) then + offset_y%end = 2 + else + offset_y%end = 0 end if - #:endfor -#else - call MPI_SENDRECV( & - p_send, buffer_count, mpi_p, dst_proc, send_tag, & - p_recv, buffer_count, mpi_p, src_proc, recv_tag, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) #endif - ! Unpack Received Buffer - #:for mpi_dir in [1, 2, 3] - if (mpi_dir == ${mpi_dir}$) then - #:if mpi_dir == 1 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = 1, nVars - r = (i - 1) + nVars* & - (j + buff_size*((k + 1) + (n + 1)*l)) - c_divs_vf(i)%sf(j + unpack_offset, k, l) = buff_recv(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + ! Beginning and end sub-domain boundary locations + if (parallel_io) then + if (proc_coords(2) < rem_cells) then + start_idx(2) = (n + 1)*proc_coords(2) + else + start_idx(2) = (n + 1)*proc_coords(2) + rem_cells + end if + else +#ifdef MFC_PRE_PROCESS + if (old_grid .neqv. .true.) then + dy = (y_domain%end - y_domain%beg)/real(n_glb + 1, wp) + + if (proc_coords(2) < rem_cells) then + y_domain%beg = y_domain%beg + dy*real((n + 1)* & + proc_coords(2)) + y_domain%end = y_domain%end - dy*real((n + 1)* & + (num_procs_y - proc_coords(2) - 1) & + - (num_procs_y - rem_cells)) + else + y_domain%beg = y_domain%beg + dy*real((n + 1)* & + proc_coords(2) + rem_cells) + y_domain%end = y_domain%end - dy*real((n + 1)* & + (num_procs_y - proc_coords(2) - 1)) + end if + end if #endif - end do - end do - end do - end do + end if - #:elif mpi_dir == 2 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVars - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - r = (i - 1) + nVars* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - c_divs_vf(i)%sf(j, k + unpack_offset, l) = buff_recv(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + ! 1D Cartesian Processor Topology + else + + ! Optimal processor topology + num_procs_x = num_procs + + ! Creating new communicator using the Cartesian topology + call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), & + (/.true./), .false., MPI_COMM_CART, & + ierr) + + ! Finding the Cartesian coordinates of the local process + call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, & + proc_coords, ierr) + + end if + + ! Global Parameters for x-direction + + ! Number of remaining cells + rem_cells = mod(m + 1, num_procs_x) + + ! Optimal number of cells per processor + m = (m + 1)/num_procs_x - 1 + + ! Distributing the remaining cells + do i = 1, rem_cells + if (proc_coords(1) == i - 1) then + m = m + 1; exit + end if + end do + + ! Boundary condition at the beginning + if (proc_coords(1) > 0 .or. (bc_x%beg == BC_PERIODIC .and. num_procs_x > 1)) then + proc_coords(1) = proc_coords(1) - 1 + call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr) + proc_coords(1) = proc_coords(1) + 1 + end if + + ! Boundary condition at the end + if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == BC_PERIODIC .and. num_procs_x > 1)) then + proc_coords(1) = proc_coords(1) + 1 + call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr) + proc_coords(1) = proc_coords(1) - 1 + end if + +#ifdef MFC_POST_PROCESS + ! Ghost zone at the beginning + if (proc_coords(1) > 0 .and. format == 1 .and. n > 0) then + offset_x%beg = 2 + else + offset_x%beg = 0 + end if + + ! Ghost zone at the end + if (proc_coords(1) < num_procs_x - 1 .and. format == 1 .and. n > 0) then + offset_x%end = 2 + else + offset_x%end = 0 + end if #endif - end do - end do - end do - end do - #:else - ! Unpacking buffer from bc_z%beg - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVars - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + nVars* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - c_divs_vf(i)%sf(j, k, l + unpack_offset) = buff_recv(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + ! Beginning and end sub-domain boundary locations + if (parallel_io) then + if (proc_coords(1) < rem_cells) then + start_idx(1) = (m + 1)*proc_coords(1) + else + start_idx(1) = (m + 1)*proc_coords(1) + rem_cells + end if + else +#ifdef MFC_PRE_PROCESS + if (old_grid .neqv. .true.) then + dx = (x_domain%end - x_domain%beg)/real(m_glb + 1, wp) + + if (proc_coords(1) < rem_cells) then + x_domain%beg = x_domain%beg + dx*real((m + 1)* & + proc_coords(1)) + x_domain%end = x_domain%end - dx*real((m + 1)* & + (num_procs_x - proc_coords(1) - 1) & + - (num_procs_x - rem_cells)) + else + x_domain%beg = x_domain%beg + dx*real((m + 1)* & + proc_coords(1) + rem_cells) + x_domain%end = x_domain%end - dx*real((m + 1)* & + (num_procs_x - proc_coords(1) - 1)) + end if + end if +#endif + end if #endif - end do - end do - end do - end do - #:endif + end subroutine s_mpi_decompose_computational_domain + + !> The goal of this procedure is to populate the buffers of + !! the grid variables by communicating with the neighboring + !! processors. Note that only the buffers of the cell-width + !! distributions are handled in such a way. This is because + !! the buffers of cell-boundary locations may be calculated + !! directly from those of the cell-width distributions. + !! @param mpi_dir MPI communication coordinate direction + !! @param pbc_loc Processor boundary condition (PBC) location +#ifndef MFC_PRE_PROCESS + subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc) + + integer, intent(in) :: mpi_dir + integer, intent(in) :: pbc_loc + +#ifdef MFC_MPI + + ! MPI Communication in x-direction + if (mpi_dir == 1) then + + if (pbc_loc == -1) then ! PBC at the beginning + + if (bc_x%end >= 0) then ! PBC at the beginning and end + + ! Send/receive buffer to/from bc_x%end/bc_x%beg + call MPI_SENDRECV( & + dx(m - buff_size + 1), buff_size, & + mpi_p, bc_x%end, 0, & + dx(-buff_size), buff_size, & + mpi_p, bc_x%beg, 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + else ! PBC at the beginning only + + ! Send/receive buffer to/from bc_x%beg/bc_x%beg + call MPI_SENDRECV( & + dx(0), buff_size, & + mpi_p, bc_x%beg, 1, & + dx(-buff_size), buff_size, & + mpi_p, bc_x%beg, 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + end if + + else ! PBC at the end + + if (bc_x%beg >= 0) then ! PBC at the end and beginning + + ! Send/receive buffer to/from bc_x%beg/bc_x%end + call MPI_SENDRECV( & + dx(0), buff_size, & + mpi_p, bc_x%beg, 1, & + dx(m + 1), buff_size, & + mpi_p, bc_x%end, 1, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + else ! PBC at the end only + + ! Send/receive buffer to/from bc_x%end/bc_x%end + call MPI_SENDRECV( & + dx(m - buff_size + 1), buff_size, & + mpi_p, bc_x%end, 0, & + dx(m + 1), buff_size, & + mpi_p, bc_x%end, 1, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + end if + end if - #:endfor + ! END: MPI Communication in x-direction + + ! MPI Communication in y-direction + elseif (mpi_dir == 2) then + + if (pbc_loc == -1) then ! PBC at the beginning + + if (bc_y%end >= 0) then ! PBC at the beginning and end + + ! Send/receive buffer to/from bc_y%end/bc_y%beg + call MPI_SENDRECV( & + dy(n - buff_size + 1), buff_size, & + mpi_p, bc_y%end, 0, & + dy(-buff_size), buff_size, & + mpi_p, bc_y%beg, 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + else ! PBC at the beginning only + + ! Send/receive buffer to/from bc_y%beg/bc_y%beg + call MPI_SENDRECV( & + dy(0), buff_size, & + mpi_p, bc_y%beg, 1, & + dy(-buff_size), buff_size, & + mpi_p, bc_y%beg, 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + end if + + else ! PBC at the end + + if (bc_y%beg >= 0) then ! PBC at the end and beginning + + ! Send/receive buffer to/from bc_y%beg/bc_y%end + call MPI_SENDRECV( & + dy(0), buff_size, & + mpi_p, bc_y%beg, 1, & + dy(n + 1), buff_size, & + mpi_p, bc_y%end, 1, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + else ! PBC at the end only + + ! Send/receive buffer to/from bc_y%end/bc_y%end + call MPI_SENDRECV( & + dy(n - buff_size + 1), buff_size, & + mpi_p, bc_y%end, 0, & + dy(n + 1), buff_size, & + mpi_p, bc_y%end, 1, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + end if + end if + ! END: MPI Communication in y-direction + + ! MPI Communication in z-direction + else + + if (pbc_loc == -1) then ! PBC at the beginning + + if (bc_z%end >= 0) then ! PBC at the beginning and end + + ! Send/receive buffer to/from bc_z%end/bc_z%beg + call MPI_SENDRECV( & + dz(p - buff_size + 1), buff_size, & + mpi_p, bc_z%end, 0, & + dz(-buff_size), buff_size, & + mpi_p, bc_z%beg, 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + else ! PBC at the beginning only + + ! Send/receive buffer to/from bc_z%beg/bc_z%beg + call MPI_SENDRECV( & + dz(0), buff_size, & + mpi_p, bc_z%beg, 1, & + dz(-buff_size), buff_size, & + mpi_p, bc_z%beg, 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + end if + + else ! PBC at the end + + if (bc_z%beg >= 0) then ! PBC at the end and beginning + + ! Send/receive buffer to/from bc_z%beg/bc_z%end + call MPI_SENDRECV( & + dz(0), buff_size, & + mpi_p, bc_z%beg, 1, & + dz(p + 1), buff_size, & + mpi_p, bc_z%end, 1, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + else ! PBC at the end only + + ! Send/receive buffer to/from bc_z%end/bc_z%end + call MPI_SENDRECV( & + dz(p - buff_size + 1), buff_size, & + mpi_p, bc_z%end, 0, & + dz(p + 1), buff_size, & + mpi_p, bc_z%end, 1, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + end if + + end if + + end if + ! END: MPI Communication in z-direction #endif - end subroutine s_mpi_sendrecv_capilary_variables_buffers + end subroutine s_mpi_sendrecv_grid_variables_buffers +#endif !> Module deallocation and/or disassociation procedures impure subroutine s_finalize_mpi_common_module diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 6b506b77cd..2d3eb6a3a5 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -1138,6 +1138,10 @@ contains if (cont_damage) qK_prim_vf(damage_idx)%sf(j, k, l) = qK_cons_vf(damage_idx)%sf(j, k, l) +#ifdef MFC_POST_PROCESS + if (bubbles_lagrange) qK_prim_vf(beta_idx)%sf(j, k, l) = qK_cons_vf(beta_idx)%sf(j, k, l) +#endif + end do end do end do diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.f90 index 45a2baa728..4695e27052 100644 --- a/src/post_process/m_data_input.f90 +++ b/src/post_process/m_data_input.f90 @@ -17,8 +17,12 @@ module m_data_input use m_mpi_proxy !< Message passing interface (MPI) module proxy + use m_mpi_common + use m_compile_specific + use m_boundary_common + use m_helper implicit none @@ -27,8 +31,6 @@ module m_data_input s_read_data_files, & s_read_serial_data_files, & s_read_parallel_data_files, & - s_populate_grid_variables_buffer_regions, & - s_populate_conservative_variables_buffer_regions, & s_finalize_data_input_module abstract interface @@ -51,8 +53,8 @@ end subroutine s_read_abstract_data_files type(scalar_field), allocatable, dimension(:), public :: q_prim_vf !< !! Primitive variables - type(scalar_field), allocatable, dimension(:), public :: q_particle !< - !! Lagrangian solver (particle void fraction) + type(integer_field), allocatable, dimension(:, :), public :: bc_type !< + !! Boundary condition identifiers type(scalar_field), public :: q_T_sf !< !! Temperature field @@ -125,6 +127,11 @@ impure subroutine s_read_serial_data_files(t_step) ' is missing. Exiting.') end if + if (bc_io) then + call s_read_serial_boundary_condition_files(t_step_dir, bc_type) + else + call s_assign_default_bc_type(bc_type) + end if ! Reading the Grid Data File for the x-direction ! Checking whether x_cb.dat exists @@ -238,29 +245,6 @@ impure subroutine s_read_serial_data_files(t_step) end if end if - if (bubbles_lagrange) then !Lagrangian solver - - ! Checking whether the data file associated with the variable - ! position of currently manipulated conservative variable exists - write (file_num, '(I0)') sys_size + 1 - file_loc = trim(t_step_dir)//'/q_cons_vf'// & - trim(file_num)//'.dat' - inquire (FILE=trim(file_loc), EXIST=file_check) - - ! Reading the data file if it exists, exiting otherwise - if (file_check) then - open (1, FILE=trim(file_loc), FORM='unformatted', & - STATUS='old', ACTION='read') - read (1) q_particle(1)%sf(0:m, 0:n, 0:p) - close (1) - else - print '(A)', 'File q_cons_vf'//trim(file_num)// & - '.dat is missing in '//trim(t_step_dir)// & - '. Exiting.' - call s_mpi_abort() - end if - end if - end subroutine s_read_serial_data_files !> This subroutine is called at each time-step that has to @@ -292,14 +276,6 @@ impure subroutine s_read_parallel_data_files(t_step) integer :: i - integer :: alt_sys !Altered sys_size for lagrangian solver - - if (bubbles_lagrange) then - alt_sys = sys_size + 1 - else - alt_sys = sys_size - end if - allocate (x_cb_glb(-1:m_glb)) allocate (y_cb_glb(-1:n_glb)) allocate (z_cb_glb(-1:p_glb)) @@ -455,8 +431,6 @@ impure subroutine s_read_parallel_data_files(t_step) ! Initialize MPI data I/O if (ib) then call s_initialize_mpi_data(q_cons_vf, ib_markers) - elseif (bubbles_lagrange) then - call s_initialize_mpi_data(q_cons_vf, beta=q_particle(1)) else call s_initialize_mpi_data(q_cons_vf) end if @@ -471,46 +445,20 @@ impure subroutine s_read_parallel_data_files(t_step) WP_MOK = int(8._wp, MPI_OFFSET_KIND) MOK = int(1._wp, MPI_OFFSET_KIND) str_MOK = int(name_len, MPI_OFFSET_KIND) - NVARS_MOK = int(alt_sys, MPI_OFFSET_KIND) + NVARS_MOK = int(sys_size, MPI_OFFSET_KIND) ! Read the data for each variable - if (bubbles_euler .or. elasticity) then - do i = 1, sys_size - var_MOK = int(i, MPI_OFFSET_KIND) - - ! Initial displacement to skip at beginning of file - disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1) - - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), & - 'native', mpi_info_int, ierr) - call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size, & - mpi_p, status, ierr) - end do - else - do i = 1, sys_size - var_MOK = int(i, MPI_OFFSET_KIND) - - ! Initial displacement to skip at beginning of file - disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1) - - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), & - 'native', mpi_info_int, ierr) - call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size, & - mpi_p, status, ierr) - end do - end if - - if (bubbles_lagrange) then !Lagrangian solver - var_MOK = int(sys_size + 1, MPI_OFFSET_KIND) + do i = 1, sys_size + var_MOK = int(i, MPI_OFFSET_KIND) ! Initial displacement to skip at beginning of file disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1) - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(sys_size + 1), & + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), & 'native', mpi_info_int, ierr) - call MPI_FILE_READ(ifile, MPI_IO_DATA%var(sys_size + 1)%sf, data_size, & - mpi_p, status, ierr) - end if + call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size, & + mpi_p, status, ierr) + end do call s_mpi_barrier() @@ -544,763 +492,15 @@ impure subroutine s_read_parallel_data_files(t_step) deallocate (x_cb_glb, y_cb_glb, z_cb_glb) -#endif - - end subroutine s_read_parallel_data_files - - !> The following subroutine populates the buffer regions of - !! the cell-width spacings, the cell-boundary locations and - !! the cell-center locations. Note that the buffer regions - !! of the last two variables should be interpreted slightly - !! differently than usual. They are really ghost zones that - !! are used in aiding the multidimensional visualization of - !! Silo database files, in VisIt, when processor boundary - !! conditions are present. - impure subroutine s_populate_grid_variables_buffer_regions - - integer :: i !< Generic loop iterator - - ! Populating Buffer Regions in the x-direction - - ! Ghost-cell extrapolation BC at the beginning - if (bc_x%beg <= BC_GHOST_EXTRAP) then - - do i = 1, buff_size - dx(-i) = dx(0) - end do - - ! Symmetry BC at the beginning - elseif (bc_x%beg == BC_REFLECTIVE) then - - do i = 1, buff_size - dx(-i) = dx(i - 1) - end do - - ! Periodic BC at the beginning - elseif (bc_x%beg == BC_PERIODIC) then - - do i = 1, buff_size - dx(-i) = dx((m + 1) - i) - end do - - ! Processor BC at the beginning - else - - call s_mpi_sendrecv_grid_vars_buffer_regions('beg', 'x') - - end if - - do i = 1, offset_x%beg - x_cb(-1 - i) = x_cb(-i) - dx(-i) - end do - - do i = 1, buff_size - x_cc(-i) = x_cc(1 - i) - (dx(1 - i) + dx(-i))/2._wp - end do - - ! Ghost-cell extrapolation BC at the end - if (bc_x%end <= BC_GHOST_EXTRAP) then - - do i = 1, buff_size - dx(m + i) = dx(m) - end do - - ! Symmetry BC at the end - elseif (bc_x%end == BC_REFLECTIVE) then - - do i = 1, buff_size - dx(m + i) = dx((m + 1) - i) - end do - - ! Periodic BC at the end - elseif (bc_x%end == BC_PERIODIC) then - - do i = 1, buff_size - dx(m + i) = dx(i - 1) - end do - - ! Processor BC at the end - else - - call s_mpi_sendrecv_grid_vars_buffer_regions('end', 'x') - - end if - - do i = 1, offset_x%end - x_cb(m + i) = x_cb(m + (i - 1)) + dx(m + i) - end do - - do i = 1, buff_size - x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2._wp - end do - - ! END: Populating Buffer Regions in the x-direction - - ! Populating Buffer Regions in the y-direction - - if (n > 0) then - - ! Ghost-cell extrapolation BC at the beginning - if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_AXIS) then - - do i = 1, buff_size - dy(-i) = dy(0) - end do - - ! Symmetry BC at the beginning - elseif (bc_y%beg == BC_REFLECTIVE .or. bc_y%beg == BC_AXIS) then - - do i = 1, buff_size - dy(-i) = dy(i - 1) - end do - - ! Periodic BC at the beginning - elseif (bc_y%beg == BC_PERIODIC) then - - do i = 1, buff_size - dy(-i) = dy((n + 1) - i) - end do - - ! Processor BC at the beginning - else - - call s_mpi_sendrecv_grid_vars_buffer_regions('beg', 'y') - - end if - - do i = 1, offset_y%beg - y_cb(-1 - i) = y_cb(-i) - dy(-i) - end do - - do i = 1, buff_size - y_cc(-i) = y_cc(1 - i) - (dy(1 - i) + dy(-i))/2._wp - end do - - ! Ghost-cell extrapolation BC at the end - if (bc_y%end <= BC_GHOST_EXTRAP) then - - do i = 1, buff_size - dy(n + i) = dy(n) - end do - - ! Symmetry BC at the end - elseif (bc_y%end == BC_REFLECTIVE) then - - do i = 1, buff_size - dy(n + i) = dy((n + 1) - i) - end do - - ! Periodic BC at the end - elseif (bc_y%end == BC_PERIODIC) then - - do i = 1, buff_size - dy(n + i) = dy(i - 1) - end do - - ! Processor BC at the end - else - - call s_mpi_sendrecv_grid_vars_buffer_regions('end', 'y') - - end if - - do i = 1, offset_y%end - y_cb(n + i) = y_cb(n + (i - 1)) + dy(n + i) - end do - - do i = 1, buff_size - y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2._wp - end do - - ! END: Populating Buffer Regions in the y-direction - - ! Populating Buffer Regions in the z-direction - - if (p > 0) then - - ! Ghost-cell extrapolation BC at the beginning - if (bc_z%beg <= BC_GHOST_EXTRAP) then - - do i = 1, buff_size - dz(-i) = dz(0) - end do - - ! Symmetry BC at the beginning - elseif (bc_z%beg == BC_REFLECTIVE) then - - do i = 1, buff_size - dz(-i) = dz(i - 1) - end do - - ! Periodic BC at the beginning - elseif (bc_z%beg == BC_PERIODIC) then - - do i = 1, buff_size - dz(-i) = dz((p + 1) - i) - end do - - ! Processor BC at the beginning - else - - call s_mpi_sendrecv_grid_vars_buffer_regions('beg', 'z') - - end if - - do i = 1, offset_z%beg - z_cb(-1 - i) = z_cb(-i) - dz(-i) - end do - - do i = 1, buff_size - z_cc(-i) = z_cc(1 - i) - (dz(1 - i) + dz(-i))/2._wp - end do - - ! Ghost-cell extrapolation BC at the end - if (bc_z%end <= BC_GHOST_EXTRAP) then - - do i = 1, buff_size - dz(p + i) = dz(p) - end do - - ! Symmetry BC at the end - elseif (bc_z%end == BC_REFLECTIVE) then - - do i = 1, buff_size - dz(p + i) = dz((p + 1) - i) - end do - - ! Periodic BC at the end - elseif (bc_z%end == BC_PERIODIC) then - - do i = 1, buff_size - dz(p + i) = dz(i - 1) - end do - - ! Processor BC at the end - else - - call s_mpi_sendrecv_grid_vars_buffer_regions('end', 'z') - - end if - - do i = 1, offset_z%end - z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i) - end do - - do i = 1, buff_size - z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2._wp - end do - - end if - - end if - - ! END: Populating Buffer Regions in the z-direction - - end subroutine s_populate_grid_variables_buffer_regions - - !> The purpose of this procedure is to populate the buffers - !! of the cell-average conservative variables, depending on - !! the boundary conditions. - impure subroutine s_populate_conservative_variables_buffer_regions(q_particle) - - type(scalar_field), intent(inout), optional :: q_particle - - integer :: i, j, k !< Generic loop iterators - - ! Populating Buffer Regions in the x-direction - - ! Ghost-cell extrapolation BC at the beginning - if (bc_x%beg <= BC_GHOST_EXTRAP) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(-j, 0:n, 0:p) = & - q_particle%sf(0, 0:n, 0:p) - else - do i = 1, sys_size - q_cons_vf(i)%sf(-j, 0:n, 0:p) = q_cons_vf(i)%sf(0, 0:n, 0:p) - end do - end if - end do - - ! Symmetry BC at the beginning - elseif (bc_x%beg == BC_REFLECTIVE) then - - do j = 1, buff_size - - if (present(q_particle)) then - q_particle%sf(-j, 0:n, 0:p) = & - q_particle%sf(j - 1, 0:n, 0:p) - else - ! Density or partial densities - do i = 1, cont_idx%end - q_cons_vf(i)%sf(-j, 0:n, 0:p) = & - q_cons_vf(i)%sf(j - 1, 0:n, 0:p) - end do - - ! x-component of momentum - q_cons_vf(mom_idx%beg)%sf(-j, 0:n, 0:p) = & - -q_cons_vf(mom_idx%beg)%sf(j - 1, 0:n, 0:p) - - ! Remaining momentum component(s), if any, as well as the - ! energy and the variable(s) from advection equation(s) - do i = mom_idx%beg + 1, sys_size - q_cons_vf(i)%sf(-j, 0:n, 0:p) = & - q_cons_vf(i)%sf(j - 1, 0:n, 0:p) - end do - end if - - end do - - ! Periodic BC at the beginning - elseif (bc_x%beg == BC_PERIODIC) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(-j, 0:n, 0:p) = & - q_particle%sf((m + 1) - j, 0:n, 0:p) - else - do i = 1, sys_size - q_cons_vf(i)%sf(-j, 0:n, 0:p) = & - q_cons_vf(i)%sf((m + 1) - j, 0:n, 0:p) - end do - end if - end do - - ! Processor BC at the beginning - else - if (present(q_particle)) then - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'x', q_particle) - else - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'x') - end if - - end if - - ! Ghost-cell extrapolation BC at the end - if (bc_x%end <= BC_GHOST_EXTRAP) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(m + j, 0:n, 0:p) = & - q_particle%sf(m, 0:n, 0:p) - else - do i = 1, sys_size - q_cons_vf(i)%sf(m + j, 0:n, 0:p) = & - q_cons_vf(i)%sf(m, 0:n, 0:p) - end do - end if - end do - - ! Symmetry BC at the end - elseif (bc_x%end == BC_REFLECTIVE) then - - do j = 1, buff_size - - if (present(q_particle)) then - q_particle%sf(m + j, 0:n, 0:p) = & - q_particle%sf((m + 1) - j, 0:n, 0:p) - else - - ! Density or partial densities - do i = 1, cont_idx%end - q_cons_vf(i)%sf(m + j, 0:n, 0:p) = & - q_cons_vf(i)%sf((m + 1) - j, 0:n, 0:p) - end do - - ! x-component of momentum - q_cons_vf(mom_idx%beg)%sf(m + j, 0:n, 0:p) = & - -q_cons_vf(mom_idx%beg)%sf((m + 1) - j, 0:n, 0:p) - - ! Remaining momentum component(s), if any, as well as the - ! energy and the variable(s) from advection equation(s) - do i = mom_idx%beg + 1, sys_size - q_cons_vf(i)%sf(m + j, 0:n, 0:p) = & - q_cons_vf(i)%sf((m + 1) - j, 0:n, 0:p) - end do - end if - - end do - - ! Perodic BC at the end - elseif (bc_x%end == BC_PERIODIC) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(m + j, 0:n, 0:p) = & - q_particle%sf(j - 1, 0:n, 0:p) - else - do i = 1, sys_size - q_cons_vf(i)%sf(m + j, 0:n, 0:p) = & - q_cons_vf(i)%sf(j - 1, 0:n, 0:p) - end do - end if - end do - - ! Processor BC at the end + if (bc_io) then + call s_read_parallel_boundary_condition_files(bc_type) else - - if (present(q_particle)) then - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'x', q_particle) - else - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'x') - end if - - end if - - ! END: Populating Buffer Regions in the x-direction - - ! Populating Buffer Regions in the y-direction - - if (n > 0) then - - ! Ghost-cell extrapolation BC at the beginning - if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_AXIS) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, -j, 0:p) = q_particle%sf(:, 0, 0:p) - else - do i = 1, sys_size - q_cons_vf(i)%sf(:, -j, 0:p) = q_cons_vf(i)%sf(:, 0, 0:p) - end do - end if - end do - - ! Axis BC at the beginning - elseif (bc_y%beg == BC_AXIS) then - - do j = 1, buff_size - do k = 0, p - if (z_cc(k) < pi) then - if (present(q_particle)) then - q_particle%sf(:, -j, k) = & - q_particle%sf(:, j - 1, k + ((p + 1)/2)) - else - do i = 1, mom_idx%beg - q_cons_vf(i)%sf(:, -j, k) = & - q_cons_vf(i)%sf(:, j - 1, k + ((p + 1)/2)) - end do - - q_cons_vf(mom_idx%beg + 1)%sf(:, -j, k) = & - -q_cons_vf(mom_idx%beg + 1)%sf(:, j - 1, k + ((p + 1)/2)) - - q_cons_vf(mom_idx%end)%sf(:, -j, k) = & - -q_cons_vf(mom_idx%end)%sf(:, j - 1, k + ((p + 1)/2)) - - do i = E_idx, sys_size - q_cons_vf(i)%sf(:, -j, k) = & - q_cons_vf(i)%sf(:, j - 1, k + ((p + 1)/2)) - end do - end if - else - if (present(q_particle)) then - q_particle%sf(:, -j, k) = & - q_particle%sf(:, j - 1, k - ((p + 1)/2)) - else - do i = 1, mom_idx%beg - q_cons_vf(i)%sf(:, -j, k) = & - q_cons_vf(i)%sf(:, j - 1, k - ((p + 1)/2)) - end do - - q_cons_vf(mom_idx%beg + 1)%sf(:, -j, k) = & - -q_cons_vf(mom_idx%beg + 1)%sf(:, j - 1, k - ((p + 1)/2)) - - q_cons_vf(mom_idx%end)%sf(:, -j, k) = & - -q_cons_vf(mom_idx%end)%sf(:, j - 1, k - ((p + 1)/2)) - - do i = E_idx, sys_size - q_cons_vf(i)%sf(:, -j, k) = & - q_cons_vf(i)%sf(:, j - 1, k - ((p + 1)/2)) - end do - end if - end if - end do - end do - - ! Symmetry BC at the beginning - elseif (bc_y%beg == BC_REFLECTIVE) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, -j, 0:p) = & - q_particle%sf(:, j - 1, 0:p) - else - ! Density or partial densities and x-momentum component - do i = 1, mom_idx%beg - q_cons_vf(i)%sf(:, -j, 0:p) = & - q_cons_vf(i)%sf(:, j - 1, 0:p) - end do - - ! y-component of momentum - q_cons_vf(mom_idx%beg + 1)%sf(:, -j, 0:p) = & - -q_cons_vf(mom_idx%beg + 1)%sf(:, j - 1, 0:p) - - ! Remaining z-momentum component, if any, as well as the - ! energy and variable(s) from advection equation(s) - do i = mom_idx%beg + 2, sys_size - q_cons_vf(i)%sf(:, -j, 0:p) = & - q_cons_vf(i)%sf(:, j - 1, 0:p) - end do - end if - - end do - - ! Periodic BC at the beginning - elseif (bc_y%beg == BC_PERIODIC) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, -j, 0:p) = & - q_particle%sf(:, (n + 1) - j, 0:p) - else - do i = 1, sys_size - q_cons_vf(i)%sf(:, -j, 0:p) = & - q_cons_vf(i)%sf(:, (n + 1) - j, 0:p) - end do - end if - end do - - ! Processor BC at the beginning - else - if (present(q_particle)) then - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'y', q_particle) - else - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'y') - end if - - end if - - ! Ghost-cell extrapolation BC at the end - if (bc_y%end <= BC_GHOST_EXTRAP) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, n + j, 0:p) = & - q_particle%sf(:, n, 0:p) - else - do i = 1, sys_size - q_cons_vf(i)%sf(:, n + j, 0:p) = & - q_cons_vf(i)%sf(:, n, 0:p) - end do - end if - end do - - ! Symmetry BC at the end - elseif (bc_y%end == BC_REFLECTIVE) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, n + j, 0:p) = & - q_particle%sf(:, (n + 1) - j, 0:p) - else - ! Density or partial densities and x-momentum component - do i = 1, mom_idx%beg - q_cons_vf(i)%sf(:, n + j, 0:p) = & - q_cons_vf(i)%sf(:, (n + 1) - j, 0:p) - end do - - ! y-component of momentum - q_cons_vf(mom_idx%beg + 1)%sf(:, n + j, 0:p) = & - -q_cons_vf(mom_idx%beg + 1)%sf(:, (n + 1) - j, 0:p) - - ! Remaining z-momentum component, if any, as well as the - ! energy and variable(s) from advection equation(s) - do i = mom_idx%beg + 2, sys_size - q_cons_vf(i)%sf(:, n + j, 0:p) = & - q_cons_vf(i)%sf(:, (n + 1) - j, 0:p) - end do - end if - - end do - - ! Perodic BC at the end - elseif (bc_y%end == BC_PERIODIC) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, n + j, 0:p) = & - q_particle%sf(:, j - 1, 0:p) - else - do i = 1, sys_size - q_cons_vf(i)%sf(:, n + j, 0:p) = & - q_cons_vf(i)%sf(:, j - 1, 0:p) - end do - end if - end do - - ! Processor BC at the end - else - - if (present(q_particle)) then - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'y', q_particle) - else - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'y') - end if - - end if - - ! END: Populating Buffer Regions in the y-direction - - ! Populating Buffer Regions in the z-direction - - if (p > 0) then - - ! Ghost-cell extrapolation BC at the beginning - if (bc_z%beg <= BC_GHOST_EXTRAP) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, :, -j) = q_particle%sf(:, :, 0) - else - do i = 1, sys_size - q_cons_vf(i)%sf(:, :, -j) = q_cons_vf(i)%sf(:, :, 0) - end do - end if - end do - - ! Symmetry BC at the beginning - elseif (bc_z%beg == BC_REFLECTIVE) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, :, -j) = & - q_particle%sf(:, :, j - 1) - else - ! Density or the partial densities and the momentum - ! components in x- and y-directions - do i = 1, mom_idx%beg + 1 - q_cons_vf(i)%sf(:, :, -j) = & - q_cons_vf(i)%sf(:, :, j - 1) - end do - - ! z-component of momentum - q_cons_vf(mom_idx%end)%sf(:, :, -j) = & - -q_cons_vf(mom_idx%end)%sf(:, :, j - 1) - - ! Energy and advection equation(s) variable(s) - do i = E_idx, sys_size - q_cons_vf(i)%sf(:, :, -j) = & - q_cons_vf(i)%sf(:, :, j - 1) - end do - end if - - end do - - ! Periodic BC at the beginning - elseif (bc_z%beg == BC_PERIODIC) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, :, -j) = & - q_particle%sf(:, :, (p + 1) - j) - else - do i = 1, sys_size - q_cons_vf(i)%sf(:, :, -j) = & - q_cons_vf(i)%sf(:, :, (p + 1) - j) - end do - end if - end do - - ! Processor BC at the beginning - else - - if (present(q_particle)) then - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'z', q_particle) - else - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'z') - end if - - end if - - ! Ghost-cell extrapolation BC at the end - if (bc_z%end <= BC_GHOST_EXTRAP) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, :, p + j) = & - q_particle%sf(:, :, p) - else - do i = 1, sys_size - q_cons_vf(i)%sf(:, :, p + j) = & - q_cons_vf(i)%sf(:, :, p) - end do - end if - end do - - ! Symmetry BC at the end - elseif (bc_z%end == BC_REFLECTIVE) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, :, p + j) = & - q_particle%sf(:, :, (p + 1) - j) - else - ! Density or the partial densities and the momentum - ! components in x- and y-directions - do i = 1, mom_idx%beg + 1 - q_cons_vf(i)%sf(:, :, p + j) = & - q_cons_vf(i)%sf(:, :, (p + 1) - j) - end do - - ! z-component of momentum - q_cons_vf(mom_idx%end)%sf(:, :, p + j) = & - -q_cons_vf(mom_idx%end)%sf(:, :, (p + 1) - j) - - ! Energy and advection equation(s) variable(s) - do i = E_idx, sys_size - q_cons_vf(i)%sf(:, :, p + j) = & - q_cons_vf(i)%sf(:, :, (p + 1) - j) - end do - end if - - end do - - ! Perodic BC at the end - elseif (bc_z%end == BC_PERIODIC) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, :, p + j) = & - q_particle%sf(:, :, j - 1) - else - do i = 1, sys_size - q_cons_vf(i)%sf(:, :, p + j) = & - q_cons_vf(i)%sf(:, :, j - 1) - end do - end if - end do - - ! Processor BC at the end - else - - if (present(q_particle)) then - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'z', q_particle) - else - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'z') - end if - - end if - - end if - + call s_assign_default_bc_type(bc_type) end if - ! END: Populating Buffer Regions in the z-direction +#endif - end subroutine s_populate_conservative_variables_buffer_regions + end subroutine s_read_parallel_data_files !> Computation of parameters, allocation procedures, and/or !! any other tasks needed to properly setup the module @@ -1313,7 +513,6 @@ impure subroutine s_initialize_data_input_module ! the simulation allocate (q_cons_vf(1:sys_size)) allocate (q_prim_vf(1:sys_size)) - if (bubbles_lagrange) allocate (q_particle(1)) ! Allocating the parts of the conservative and primitive variables ! that do require the direct knowledge of the dimensionality of the @@ -1340,12 +539,6 @@ impure subroutine s_initialize_data_input_module -buff_size:p + buff_size)) end if - if (bubbles_lagrange) then - allocate (q_particle(1)%sf(-buff_size:m + buff_size, & - -buff_size:n + buff_size, & - -buff_size:p + buff_size)) - end if - if (chemistry) then allocate (q_T_sf%sf(-buff_size:m + buff_size, & -buff_size:n + buff_size, & @@ -1370,12 +563,6 @@ impure subroutine s_initialize_data_input_module 0:0)) end if - if (bubbles_lagrange) then - allocate (q_particle(1)%sf(-buff_size:m + buff_size, & - -buff_size:n + buff_size, & - 0:0)) - end if - if (chemistry) then allocate (q_T_sf%sf(-buff_size:m + buff_size, & -buff_size:n + buff_size, & @@ -1399,16 +586,26 @@ impure subroutine s_initialize_data_input_module allocate (ib_markers%sf(-buff_size:m + buff_size, 0:0, 0:0)) end if - if (bubbles_lagrange) then - allocate (q_particle(1)%sf(-buff_size:m + buff_size, 0:0, 0:0)) - end if - if (chemistry) then allocate (q_T_sf%sf(-buff_size:m + buff_size, 0:0, 0:0)) end if end if + ! Allocating arrays to store the bc types + allocate (bc_type(1:num_dims, -1:1)) + + allocate (bc_type(1, -1)%sf(0:0, 0:n, 0:p)) + allocate (bc_type(1, 1)%sf(0:0, 0:n, 0:p)) + if (n > 0) then + allocate (bc_type(2, -1)%sf(-buff_size:m + buff_size, 0:0, 0:p)) + allocate (bc_type(2, 1)%sf(-buff_size:m + buff_size, 0:0, 0:p)) + if (p > 0) then + allocate (bc_type(3, -1)%sf(-buff_size:m + buff_size, -buff_size:n + buff_size, 0:0)) + allocate (bc_type(3, 1)%sf(-buff_size:m + buff_size, -buff_size:n + buff_size, 0:0)) + end if + end if + if (parallel_io .neqv. .true.) then s_read_data_files => s_read_serial_data_files else @@ -1435,15 +632,20 @@ impure subroutine s_finalize_data_input_module deallocate (ib_markers%sf) end if - if (bubbles_lagrange) then - deallocate (q_particle(1)%sf) - deallocate (q_particle) - end if - if (chemistry) then deallocate (q_T_sf%sf) end if + deallocate (bc_type(1, -1)%sf, bc_type(1, 1)%sf) + if (n > 0) then + deallocate (bc_type(2, -1)%sf, bc_type(2, 1)%sf) + if (p > 0) then + deallocate (bc_type(3, -1)%sf, bc_type(3, 1)%sf) + end if + end if + + deallocate (bc_type) + s_read_data_files => null() end subroutine s_finalize_data_input_module diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 2c93cca230..0f1c764670 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -124,6 +124,7 @@ module m_global_parameters type(int_bounds_info) :: mom_idx !< Indexes of first & last momentum eqns. integer :: E_idx !< Index of energy equation integer :: n_idx !< Index of number density + integer :: beta_idx !< Index of lagrange bubbles beta type(int_bounds_info) :: adv_idx !< Indexes of first & last advection eqns. type(int_bounds_info) :: internalEnergies_idx !< Indexes of first & last internal energy eqns. type(bub_bounds_info) :: bub_idx !< Indexes of first & last bubble variable eqns. @@ -147,6 +148,8 @@ module m_global_parameters ! Stands for "InDices With BUFFer". type(int_bounds_info) :: idwbuff(1:3) + integer :: num_bc_patches + logical :: bc_io !> @name Boundary conditions in the x-, y- and z-coordinate directions !> @{ type(int_bounds_info) :: bc_x, bc_y, bc_z @@ -370,6 +373,8 @@ contains bc_x%beg = dflt_int; bc_x%end = dflt_int bc_y%beg = dflt_int; bc_y%end = dflt_int bc_z%beg = dflt_int; bc_z%end = dflt_int + bc_io = .false. + num_bc_patches = dflt_int #:for DIM in ['x', 'y', 'z'] #:for DIR in [1, 2, 3] @@ -468,7 +473,7 @@ contains integer :: i, j, fac ! Setting m_root equal to m in the case of a 1D serial simulation - if (num_procs == 1 .and. n == 0) m_root = m + if (n == 0) m_root = m_glb ! Gamma/Pi_inf Model if (model_eqns == 1) then @@ -586,6 +591,11 @@ contains end if + if (bubbles_lagrange) then + beta_idx = sys_size + 1 + sys_size = beta_idx + end if + if (mhd) then B_idx%beg = sys_size + 1 if (n == 0) then @@ -762,21 +772,12 @@ contains chemxe = species_idx%end #ifdef MFC_MPI - if (bubbles_lagrange) then - allocate (MPI_IO_DATA%view(1:sys_size + 1)) - allocate (MPI_IO_DATA%var(1:sys_size + 1)) - do i = 1, sys_size + 1 - allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) - MPI_IO_DATA%var(i)%sf => null() - end do - else - allocate (MPI_IO_DATA%view(1:sys_size)) - allocate (MPI_IO_DATA%var(1:sys_size)) - do i = 1, sys_size - allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) - MPI_IO_DATA%var(i)%sf => null() - end do - end if + allocate (MPI_IO_DATA%view(1:sys_size)) + allocate (MPI_IO_DATA%var(1:sys_size)) + do i = 1, sys_size + allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) + MPI_IO_DATA%var(i)%sf => null() + end do if (ib) allocate (MPI_IO_IB_DATA%var%sf(0:m, 0:n, 0:p)) #endif @@ -924,21 +925,18 @@ contains integer :: i ! Deallocating the grid variables for the x-coordinate direction - deallocate (x_cb, x_cc, dx) + deallocate (x_cc, x_cb, dx) ! Deallocating grid variables for the y- and z-coordinate directions if (n > 0) then - - deallocate (y_cb, y_cc, dy) - - if (p > 0) deallocate (z_cb, z_cc, dz) - + deallocate (y_cc, y_cb, dy) + if (p > 0) then + deallocate (z_cc, z_cb, dz) + end if + else ! Deallocating the grid variables, only used for the 1D simulations, ! and containing the defragmented computational domain grid data - else - deallocate (x_root_cb, x_root_cc) - end if deallocate (proc_coords) @@ -953,8 +951,6 @@ contains MPI_IO_DATA%var(i)%sf => null() end do - if (bubbles_lagrange) MPI_IO_DATA%var(sys_size + 1)%sf => null() - deallocate (MPI_IO_DATA%var) deallocate (MPI_IO_DATA%view) end if diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index 2e693488f1..effe0b9bb3 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -23,14 +23,6 @@ module m_mpi_proxy implicit none - !> @name Buffers of the conservative variables received/sent from/to neighboring - !! processors. Note that these variables are structured as vectors rather - !! than arrays. - !> @{ - real(wp), allocatable, dimension(:) :: q_cons_buffer_in - real(wp), allocatable, dimension(:) :: q_cons_buffer_out - !> @} - !> @name Receive counts and displacement vector variables, respectively, used in !! enabling MPI to gather varying amounts of data from all processes to the !! root process @@ -41,7 +33,7 @@ module m_mpi_proxy !> @name Generic flags used to identify and report MPI errors !> @{ - integer, private :: err_code, ierr + integer, private :: ierr !> @} contains @@ -54,61 +46,6 @@ contains integer :: i !< Generic loop iterator - ! Allocating vectorized buffer regions of conservative variables. - ! The length of buffer vectors are set according to the size of the - ! largest buffer region in the sub-domain. - if (buff_size > 0) then - - ! Simulation is at least 2D - if (n > 0) then - - ! Simulation is 3D - if (p > 0) then - - allocate (q_cons_buffer_in(0:buff_size* & - sys_size* & - (m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)* & - (p + 2*buff_size + 1)/ & - (min(m, n, p) & - + 2*buff_size + 1) - 1)) - allocate (q_cons_buffer_out(0:buff_size* & - sys_size* & - (m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)* & - (p + 2*buff_size + 1)/ & - (min(m, n, p) & - + 2*buff_size + 1) - 1)) - - ! Simulation is 2D - else - - allocate (q_cons_buffer_in(0:buff_size* & - sys_size* & - (max(m, n) & - + 2*buff_size + 1) - 1)) - allocate (q_cons_buffer_out(0:buff_size* & - sys_size* & - (max(m, n) & - + 2*buff_size + 1) - 1)) - - end if - - ! Simulation is 1D - else - - allocate (q_cons_buffer_in(0:buff_size*sys_size - 1)) - allocate (q_cons_buffer_out(0:buff_size*sys_size - 1)) - - end if - - ! Initially zeroing out the vectorized buffer region variables - ! to avoid possible underflow from any unused allocated memory - q_cons_buffer_in = 0._wp - q_cons_buffer_out = 0._wp - - end if - ! Allocating and configuring the receive counts and the displacement ! vector variables used in variable-gather communication procedures. ! Note that these are only needed for either multidimensional runs @@ -201,1295 +138,6 @@ contains end subroutine s_mpi_bcast_user_inputs - !> This subroutine takes care of efficiently distributing - !! the computational domain among the available processors - !! as well as recomputing some of the global parameters so - !! that they reflect the configuration of sub-domain that - !! is overseen by the local processor. - impure subroutine s_mpi_decompose_computational_domain - -#ifdef MFC_MPI - - ! # of processors in the x-, y- and z-coordinate directions - integer :: num_procs_x, num_procs_y, num_procs_z - - ! Temporary # of processors in x-, y- and z-coordinate directions - ! used during the processor factorization optimization procedure - real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z - - ! Processor factorization (fct) minimization parameter - real(wp) :: fct_min - - ! Cartesian processor topology communicator - integer :: MPI_COMM_CART - - ! Number of remaining cells for a particular coordinate direction - ! after the bulk has evenly been distributed among the available - ! processors for that coordinate direction - integer :: rem_cells - - ! Generic loop iterators - integer :: i, j - - if (num_procs == 1 .and. parallel_io) then - do i = 1, num_dims - start_idx(i) = 0 - end do - return - end if - - ! Performing the computational domain decomposition. The procedure - ! is optimized by ensuring that each processor contains a close to - ! equivalent piece of the computational domain. Note that explicit - ! type-casting is omitted here for code legibility purposes. - - ! Generating 3D Cartesian Processor Topology - - if (n > 0) then - - if (p > 0) then - - if (cyl_coord .and. p > 0) then - ! Implement pencil processor blocking if using cylindrical coordinates so - ! that all cells in azimuthal direction are stored on a single processor. - ! This is necessary for efficient application of Fourier filter near axis. - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = num_procs - num_procs_z = 1 - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - tmp_num_procs_z = num_procs_z - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = num_procs/i - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - .and. & - (n + 1)/tmp_num_procs_y & - >= & - num_stcls_min*weno_order) then - - num_procs_x = i - num_procs_y = num_procs/i - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - ierr = 0 - - end if - - end if - - end do - - else - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = 1 - num_procs_z = num_procs - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - tmp_num_procs_z = num_procs_z - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + 10._wp*abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - do j = 1, (num_procs/i) - - if (mod(num_procs/i, j) == 0 & - .and. & - (n + 1)/j >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = j - tmp_num_procs_z = num_procs/(i*j) - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) & - .and. & - (p + 1)/tmp_num_procs_z & - >= & - num_stcls_min*weno_order) & - then - - num_procs_x = i - num_procs_y = j - num_procs_z = num_procs/(i*j) - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) - ierr = 0 - - end if - - end if - - end do - - end if - - end do - - end if - - ! Checking whether the decomposition of the computational - ! domain was successful - if (proc_rank == 0 .and. ierr == -1) then - print '(A)', 'Unable to decompose computational '// & - 'domain for selected number of '// & - 'processors. Exiting.' - call MPI_ABORT(MPI_COMM_WORLD, 1, ierr) - end if - - ! Creating a new communicator using Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, & - num_procs_y, num_procs_z/), & - (/.true., .true., .true./), & - .false., MPI_COMM_CART, ierr) - - ! Finding corresponding Cartesian coordinates of the local - ! processor rank in newly declared cartesian communicator - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, & - proc_coords, ierr) - - ! END: Generating 3D Cartesian Processor Topology - - ! Sub-domain Global Parameters in z-direction - - ! Number of remaining cells after majority is distributed - rem_cells = mod(p + 1, num_procs_z) - - ! Optimal number of cells per processor - p = (p + 1)/num_procs_z - 1 - - ! Distributing any remaining cells - do i = 1, rem_cells - if (proc_coords(3) == i - 1) then - p = p + 1 - exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(3) > 0 .or. bc_z%beg == BC_PERIODIC) then - proc_coords(3) = proc_coords(3) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_z%beg, ierr) - proc_coords(3) = proc_coords(3) + 1 - end if - - ! Ghost zone at the beginning - if (proc_coords(3) > 0 .and. format == 1) then - offset_z%beg = 2 - else - offset_z%beg = 0 - end if - - ! Boundary condition at the end - if (proc_coords(3) < num_procs_z - 1 .or. bc_z%end == BC_PERIODIC) then - proc_coords(3) = proc_coords(3) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_z%end, ierr) - proc_coords(3) = proc_coords(3) - 1 - end if - - ! Ghost zone at the end - if (proc_coords(3) < num_procs_z - 1 .and. format == 1) then - offset_z%end = 2 - else - offset_z%end = 0 - end if - - if (parallel_io) then - if (proc_coords(3) < rem_cells) then - start_idx(3) = (p + 1)*proc_coords(3) - else - start_idx(3) = (p + 1)*proc_coords(3) + rem_cells - end if - end if - - ! Generating 2D Cartesian Processor Topology - - else - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = num_procs - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = num_procs/i - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - .and. & - (n + 1)/tmp_num_procs_y & - >= & - num_stcls_min*weno_order) then - - num_procs_x = i - num_procs_y = num_procs/i - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - ierr = 0 - - end if - - end if - - end do - - ! Checking whether the decomposition of the computational - ! domain was successful - if (proc_rank == 0 .and. ierr == -1) then - print '(A)', 'Unable to decompose computational '// & - 'domain for selected number of '// & - 'processors. Exiting.' - call MPI_ABORT(MPI_COMM_WORLD, 1, ierr) - end if - - ! Creating a new communicator using Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, & - num_procs_y/), (/.true., & - .true./), .false., MPI_COMM_CART, & - ierr) - - ! Finding corresponding Cartesian coordinates of the local - ! processor rank in newly declared cartesian communicator - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, & - proc_coords, ierr) - - end if - - ! END: Generating 2D Cartesian Processor Topology - - ! Sub-domain Global Parameters in y-direction - - ! Number of remaining cells after majority has been distributed - rem_cells = mod(n + 1, num_procs_y) - - ! Optimal number of cells per processor - n = (n + 1)/num_procs_y - 1 - - ! Distributing any remaining cells - do i = 1, rem_cells - if (proc_coords(2) == i - 1) then - n = n + 1 - exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(2) > 0 .or. bc_y%beg == BC_PERIODIC) then - proc_coords(2) = proc_coords(2) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_y%beg, ierr) - proc_coords(2) = proc_coords(2) + 1 - end if - - ! Ghost zone at the beginning - if (proc_coords(2) > 0 .and. format == 1) then - offset_y%beg = 2 - else - offset_y%beg = 0 - end if - - ! Boundary condition at the end - if (proc_coords(2) < num_procs_y - 1 .or. bc_y%end == BC_PERIODIC) then - proc_coords(2) = proc_coords(2) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_y%end, ierr) - proc_coords(2) = proc_coords(2) - 1 - end if - - ! Ghost zone at the end - if (proc_coords(2) < num_procs_y - 1 .and. format == 1) then - offset_y%end = 2 - else - offset_y%end = 0 - end if - - if (parallel_io) then - if (proc_coords(2) < rem_cells) then - start_idx(2) = (n + 1)*proc_coords(2) - else - start_idx(2) = (n + 1)*proc_coords(2) + rem_cells - end if - end if - - ! Generating 1D Cartesian Processor Topology - - else - - ! Number of processors in the coordinate direction is equal to - ! the total number of processors available - num_procs_x = num_procs - - ! Number of cells in undecomposed computational domain needed - ! for sub-domain reassembly during formatted data output - m_root = m - - ! Creating a new communicator using Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), & - (/.true./), .false., MPI_COMM_CART, & - ierr) - - ! Finding the corresponding Cartesian coordinates of the local - ! processor rank in the newly declared cartesian communicator - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, & - proc_coords, ierr) - - end if - - ! Sub-domain Global Parameters in x-direction - - ! Number of remaining cells after majority has been distributed - rem_cells = mod(m + 1, num_procs_x) - - ! Optimal number of cells per processor - m = (m + 1)/num_procs_x - 1 - - ! Distributing any remaining cells - do i = 1, rem_cells - if (proc_coords(1) == i - 1) then - m = m + 1 - exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(1) > 0 .or. bc_x%beg == BC_PERIODIC) then - proc_coords(1) = proc_coords(1) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr) - proc_coords(1) = proc_coords(1) + 1 - end if - - ! Ghost zone at the beginning - if (proc_coords(1) > 0 .and. format == 1 .and. n > 0) then - offset_x%beg = 2 - else - offset_x%beg = 0 - end if - - ! Boundary condition at the end - if (proc_coords(1) < num_procs_x - 1 .or. bc_x%end == BC_PERIODIC) then - proc_coords(1) = proc_coords(1) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr) - proc_coords(1) = proc_coords(1) - 1 - end if - - ! Ghost zone at the end - if (proc_coords(1) < num_procs_x - 1 .and. format == 1 .and. n > 0) then - offset_x%end = 2 - else - offset_x%end = 0 - end if - - if (parallel_io) then - if (proc_coords(1) < rem_cells) then - start_idx(1) = (m + 1)*proc_coords(1) - else - start_idx(1) = (m + 1)*proc_coords(1) + rem_cells - end if - end if - -#endif - - end subroutine s_mpi_decompose_computational_domain - - !> Communicates the buffer regions associated with the grid - !! variables with processors in charge of the neighboring - !! sub-domains. Note that only cell-width spacings feature - !! buffer regions so that no information relating to the - !! cell-boundary locations is communicated. - !! @param pbc_loc Processor boundary condition (PBC) location - !! @param sweep_coord Coordinate direction normal to the processor boundary - impure subroutine s_mpi_sendrecv_grid_vars_buffer_regions(pbc_loc, sweep_coord) - - character(LEN=3), intent(in) :: pbc_loc - character, intent(in) :: sweep_coord - -#ifdef MFC_MPI - - ! Communications in the x-direction - - if (sweep_coord == 'x') then - - if (pbc_loc == 'beg') then ! Buffer region at the beginning - - ! PBC at both ends of the sub-domain - if (bc_x%end >= 0) then - - ! Sending/receiving the data to/from bc_x%end/bc_x%beg - call MPI_SENDRECV(dx(m - buff_size + 1), buff_size, & - mpi_p, bc_x%end, 0, & - dx(-buff_size), buff_size, & - mpi_p, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at beginning of the sub-domain - else - - ! Sending/receiving the data to/from bc_x%beg/bc_x%beg - call MPI_SENDRECV(dx(0), buff_size, & - mpi_p, bc_x%beg, 1, & - dx(-buff_size), buff_size, & - mpi_p, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - else ! Buffer region at the end - - ! PBC at both ends of the sub-domain - if (bc_x%beg >= 0) then - - ! Sending/receiving the data to/from bc_x%beg/bc_x%end - call MPI_SENDRECV(dx(0), buff_size, & - mpi_p, bc_x%beg, 1, & - dx(m + 1), buff_size, & - mpi_p, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at end of the sub-domain - else - - ! Sending/receiving the data to/from bc_x%end/bc_x%end - call MPI_SENDRECV(dx(m - buff_size + 1), buff_size, & - mpi_p, bc_x%end, 0, & - dx(m + 1), buff_size, & - mpi_p, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - end if - - ! END: Communications in the x-direction - - ! Communications in the y-direction - - elseif (sweep_coord == 'y') then - - if (pbc_loc == 'beg') then ! Buffer region at the beginning - - ! PBC at both ends of the sub-domain - if (bc_y%end >= 0) then - - ! Sending/receiving the data to/from bc_y%end/bc_y%beg - call MPI_SENDRECV(dy(n - buff_size + 1), buff_size, & - mpi_p, bc_y%end, 0, & - dy(-buff_size), buff_size, & - mpi_p, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at beginning of the sub-domain - else - - ! Sending/receiving the data to/from bc_y%beg/bc_y%beg - call MPI_SENDRECV(dy(0), buff_size, & - mpi_p, bc_y%beg, 1, & - dy(-buff_size), buff_size, & - mpi_p, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - else ! Buffer region at the end - - ! PBC at both ends of the sub-domain - if (bc_y%beg >= 0) then - - ! Sending/receiving the data to/from bc_y%beg/bc_y%end - call MPI_SENDRECV(dy(0), buff_size, & - mpi_p, bc_y%beg, 1, & - dy(n + 1), buff_size, & - mpi_p, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at end of the sub-domain - else - - ! Sending/receiving the data to/from bc_y%end/bc_y%end - call MPI_SENDRECV(dy(n - buff_size + 1), buff_size, & - mpi_p, bc_y%end, 0, & - dy(n + 1), buff_size, & - mpi_p, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - end if - - ! END: Communications in the y-direction - - ! Communications in the z-direction - - else - - if (pbc_loc == 'beg') then ! Buffer region at the beginning - - ! PBC at both ends of the sub-domain - if (bc_z%end >= 0) then - - ! Sending/receiving the data to/from bc_z%end/bc_z%beg - call MPI_SENDRECV(dz(p - buff_size + 1), buff_size, & - mpi_p, bc_z%end, 0, & - dz(-buff_size), buff_size, & - mpi_p, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at beginning of the sub-domain - else - - ! Sending/receiving the data to/from bc_z%beg/bc_z%beg - call MPI_SENDRECV(dz(0), buff_size, & - mpi_p, bc_z%beg, 1, & - dz(-buff_size), buff_size, & - mpi_p, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - else ! Buffer region at the end - - ! PBC at both ends of the sub-domain - if (bc_z%beg >= 0) then - - ! Sending/receiving the data to/from bc_z%beg/bc_z%end - call MPI_SENDRECV(dz(0), buff_size, & - mpi_p, bc_z%beg, 1, & - dz(p + 1), buff_size, & - mpi_p, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at end of the sub-domain - else - - ! Sending/receiving the data to/from bc_z%end/bc_z%end - call MPI_SENDRECV(dz(p - buff_size + 1), buff_size, & - mpi_p, bc_z%end, 0, & - dz(p + 1), buff_size, & - mpi_p, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - end if - - end if - - ! END: Communications in the z-direction - -#endif - - end subroutine s_mpi_sendrecv_grid_vars_buffer_regions - - !> Communicates buffer regions associated with conservative - !! variables with processors in charge of the neighboring - !! sub-domains - !! @param q_cons_vf Conservative variables - !! @param pbc_loc Processor boundary condition (PBC) location - !! @param sweep_coord Coordinate direction normal to the processor boundary - !! @param q_particle Projection of the lagrangian particles in the Eulerian framework - impure subroutine s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, pbc_loc, & - sweep_coord, q_particle) - - type(scalar_field), & - dimension(sys_size), & - intent(inout) :: q_cons_vf - - character(LEN=3), intent(in) :: pbc_loc - - character, intent(in) :: sweep_coord - - type(scalar_field), & - intent(inout), optional :: q_particle - -#ifdef MFC_MPI - - integer :: i, j, k, l, r !< Generic loop iterators - - ! Communications in the x-direction - - if (sweep_coord == 'x') then - - if (pbc_loc == 'beg') then ! Buffer region at the beginning - - ! PBC at both ends of the sub-domain - if (bc_x%end >= 0) then - - ! Packing the data to be sent to bc_x%end - do l = 0, p - do k = 0, n - do j = m - buff_size + 1, m - do i = 1, sys_size - r = sys_size*(j - m + buff_size - 1) & - + sys_size*buff_size*k + (i - 1) & - + sys_size*buff_size*(n + 1)*l - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_x%end/bc_x%beg - call MPI_SENDRECV(q_cons_buffer_out(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - mpi_p, bc_x%end, 0, & - q_cons_buffer_in(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - mpi_p, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at beginning of the sub-domain - else - - ! Packing the data to be sent to bc_x%beg - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, sys_size - r = (i - 1) + sys_size*j & - + sys_size*buff_size*k & - + sys_size*buff_size*(n + 1)*l - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_x%beg/bc_x%beg - call MPI_SENDRECV(q_cons_buffer_out(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - mpi_p, bc_x%beg, 1, & - q_cons_buffer_in(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - mpi_p, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - ! Unpacking the data received from bc_x%beg - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*buff_size*k + (i - 1) & - + sys_size*buff_size*(n + 1)*l - if (present(q_particle)) then - q_particle%sf(j, k, l) = q_cons_buffer_in(r) - else - q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r) - end if -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - else ! Buffer region at the end - - ! PBC at both ends of the sub-domain - if (bc_x%beg >= 0) then - - ! Packing the data to be sent to bc_x%beg - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, sys_size - r = (i - 1) + sys_size*j & - + sys_size*buff_size*k & - + sys_size*buff_size*(n + 1)*l - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - call MPI_SENDRECV(q_cons_buffer_out(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - mpi_p, bc_x%beg, 1, & - q_cons_buffer_in(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - mpi_p, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at end of the sub-domain - else - - ! Packing the data to be sent to bc_x%end - do l = 0, p - do k = 0, n - do j = m - buff_size + 1, m - do i = 1, sys_size - r = sys_size*(j - m + buff_size - 1) & - + sys_size*buff_size*k + (i - 1) & - + sys_size*buff_size*(n + 1)*l - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - call MPI_SENDRECV(q_cons_buffer_out(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - mpi_p, bc_x%end, 0, & - q_cons_buffer_in(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - mpi_p, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - ! Unpacking the data received from bc_x%end - do l = 0, p - do k = 0, n - do j = m + 1, m + buff_size - do i = 1, sys_size - r = (i - 1) + sys_size*(j - m - 1) & - + sys_size*buff_size*k & - + sys_size*buff_size*(n + 1)*l - if (present(q_particle)) then - q_particle%sf(j, k, l) = q_cons_buffer_in(r) - else - q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r) - end if -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - end if - - ! END: Communications in the x-direction - - ! Communications in the y-direction - - elseif (sweep_coord == 'y') then - - if (pbc_loc == 'beg') then ! Buffer region at the beginning - - ! PBC at both ends of the sub-domain - if (bc_y%end >= 0) then - - ! Packing the data to be sent to bc_y%end - do l = 0, p - do k = n - buff_size + 1, n - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k - n + buff_size - 1) + (i - 1) & - + sys_size*(m + 2*buff_size + 1)* & - buff_size*l - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_y%end/bc_y%beg - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (p + 1), mpi_p, & - bc_y%end, 0, q_cons_buffer_in(0), & - buff_size*sys_size* & - (m + 2*buff_size + 1)*(p + 1), & - mpi_p, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at beginning of the sub-domain - else - - ! Packing the data to be sent to bc_y%beg - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)*k & - + sys_size*(m + 2*buff_size + 1)* & - buff_size*l + (i - 1) - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_y%beg/bc_y%beg - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (p + 1), mpi_p, & - bc_y%beg, 1, q_cons_buffer_in(0), & - buff_size*sys_size* & - (m + 2*buff_size + 1)*(p + 1), & - mpi_p, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - ! Unpacking the data received from bc_y%beg - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = (i - 1) + sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + sys_size* & - (m + 2*buff_size + 1)*buff_size*l - if (present(q_particle)) then - q_particle%sf(j, k, l) = q_cons_buffer_in(r) - else - q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r) - end if -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - else ! Buffer region at the end - - ! PBC at both ends of the sub-domain - if (bc_y%beg >= 0) then - - ! Packing the data to be sent to bc_y%beg - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)*k & - + sys_size*(m + 2*buff_size + 1)* & - buff_size*l + (i - 1) - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_y%beg/bc_y%end - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (p + 1), mpi_p, & - bc_y%beg, 1, q_cons_buffer_in(0), & - buff_size*sys_size* & - (m + 2*buff_size + 1)*(p + 1), & - mpi_p, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at end of the sub-domain - else - - ! Packing the data to be sent to bc_y%end - do l = 0, p - do k = n - buff_size + 1, n - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k - n + buff_size - 1) + (i - 1) & - + sys_size*(m + 2*buff_size + 1)* & - buff_size*l - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_y%end/bc_y%end - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (p + 1), mpi_p, & - bc_y%end, 0, q_cons_buffer_in(0), & - buff_size*sys_size* & - (m + 2*buff_size + 1)*(p + 1), & - mpi_p, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - ! Unpacking the data received form bc_y%end - do l = 0, p - do k = n + 1, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = (i - 1) + sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k - n - 1) + sys_size* & - (m + 2*buff_size + 1)*buff_size*l - if (present(q_particle)) then - q_particle%sf(j, k, l) = q_cons_buffer_in(r) - else - q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r) - end if -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - end if - - ! END: Communications in the y-direction - - ! Communications in the z-direction - - else - - if (pbc_loc == 'beg') then ! Buffer region at the beginning - - ! PBC at both ends of the sub-domain - if (bc_z%end >= 0) then - - ! Packing the data to be sent to bc_z%end - do l = p - buff_size + 1, p - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + sys_size* & - (m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)* & - (l - p + buff_size - 1) + (i - 1) - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_z%end/bc_z%beg - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - mpi_p, bc_z%end, 0, & - q_cons_buffer_in(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - mpi_p, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at beginning of the sub-domain - else - - ! Packing the data to be sent to bc_z%beg - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + (i - 1) & - + sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)*l - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_z%beg/bc_z%beg - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - mpi_p, bc_z%beg, 1, & - q_cons_buffer_in(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - mpi_p, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - ! Unpacking the data from bc_z%beg - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + (i - 1) & - + sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)*(l + buff_size) - if (present(q_particle)) then - q_particle%sf(j, k, l) = q_cons_buffer_in(r) - else - q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r) - end if -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - else ! Buffer region at the end - - ! PBC at both ends of the sub-domain - if (bc_z%beg >= 0) then - - ! Packing the data to be sent to bc_z%beg - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + (i - 1) & - + sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)*l - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_z%beg/bc_z%end - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - mpi_p, bc_z%beg, 1, & - q_cons_buffer_in(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - mpi_p, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at end of the sub-domain - else - - ! Packing the data to be sent to bc_z%end - do l = p - buff_size + 1, p - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + sys_size* & - (m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)* & - (l - p + buff_size - 1) + (i - 1) - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_z%end/bc_z%end - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - mpi_p, bc_z%end, 0, & - q_cons_buffer_in(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - mpi_p, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - ! Unpacking the data received from bc_z%end - do l = p + 1, p + buff_size - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + (i - 1) & - + sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)*(l - p - 1) - if (present(q_particle)) then - q_particle%sf(j, k, l) = q_cons_buffer_in(r) - else - q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r) - end if -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - end if - - end if - - ! END: Communications in the z-direction - -#endif - - end subroutine s_mpi_sendrecv_cons_vars_buffer_regions - !> This subroutine gathers the Silo database metadata for !! the spatial extents in order to boost the performance of !! the multidimensional visualization. @@ -1654,10 +302,7 @@ contains impure subroutine s_mpi_gather_data_extents(q_sf, data_extents) real(wp), dimension(:, :, :), intent(in) :: q_sf - - real(wp), & - dimension(1:2, 0:num_procs - 1), & - intent(inout) :: data_extents + real(wp), dimension(1:2, 0:num_procs - 1), intent(inout) :: data_extents #ifdef MFC_MPI @@ -1683,13 +328,8 @@ contains !! @param q_root_sf Flow variable defined on the entire computational domain impure subroutine s_mpi_defragment_1d_flow_variable(q_sf, q_root_sf) - real(wp), & - dimension(0:m), & - intent(in) :: q_sf - - real(wp), & - dimension(0:m), & - intent(inout) :: q_root_sf + real(wp), dimension(0:m), intent(in) :: q_sf + real(wp), dimension(0:m), intent(inout) :: q_root_sf #ifdef MFC_MPI @@ -1709,12 +349,6 @@ contains #ifdef MFC_MPI - ! Deallocating the conservative variables buffer vectors - if (buff_size > 0) then - deallocate (q_cons_buffer_in) - deallocate (q_cons_buffer_out) - end if - ! Deallocating the receive counts and the displacement vector ! variables used in variable-gather communication procedures if ((format == 1 .and. n > 0) .or. n == 0) then diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90 index e7435f6ad6..0b9435b0b8 100644 --- a/src/post_process/m_start_up.f90 +++ b/src/post_process/m_start_up.f90 @@ -16,6 +16,10 @@ module m_start_up use m_mpi_proxy !< Message passing interface (MPI) module proxy + use m_mpi_common !< Common MPI subroutines + + use m_boundary_common !< Common boundary conditions subroutines + use m_variables_conversion !< Subroutines to change the state variables from !! one form to another @@ -84,7 +88,8 @@ impure subroutine s_read_input_file relax_model, cf_wrt, sigma, adv_n, ib, num_ibs, & cfl_adap_dt, cfl_const_dt, t_save, t_stop, n_start, & cfl_target, surface_tension, bubbles_lagrange, & - sim_data, hyperelasticity, Bx0, relativity, cont_damage + sim_data, hyperelasticity, Bx0, relativity, cont_damage, & + num_bc_patches ! Inquiring the status of the post_process.inp file file_loc = 'post_process.inp' @@ -115,6 +120,11 @@ impure subroutine s_read_input_file if (cfl_adap_dt .or. cfl_const_dt) cfl_dt = .true. + if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == -17) .or. & + num_bc_patches > 0) then + bc_io = .true. + end if + else call s_mpi_abort('File post_process.inp is missing. Exiting.') end if @@ -170,15 +180,10 @@ impure subroutine s_perform_time_step(t_step) ! Populating the grid and conservative variables call s_read_data_files(t_step) - ! Populating the buffer regions of the grid variables + ! Populating the buffer regions of the grid and conservative variables if (buff_size > 0) then - call s_populate_grid_variables_buffer_regions() - end if - - ! Populating the buffer regions of the conservative variables - if (buff_size > 0) then - call s_populate_conservative_variables_buffer_regions() - if (bubbles_lagrange) call s_populate_conservative_variables_buffer_regions(q_particle(1)) + call s_populate_grid_variables_buffers() + call s_populate_variables_buffers(bc_type, q_cons_vf) end if ! Initialize the Temperature cache. @@ -650,7 +655,7 @@ impure subroutine s_save_data(t_step, varname, pres, c, H) ! Adding the lagrangian subgrid variables to the formatted database file if (bubbles_lagrange) then !! Void fraction field - q_sf = 1._wp - q_particle(1)%sf( & + q_sf = 1._wp - q_cons_vf(beta_idx)%sf( & -offset_x%beg:m + offset_x%end, & -offset_y%beg:n + offset_y%end, & -offset_z%beg:p + offset_z%end) @@ -681,7 +686,11 @@ impure subroutine s_initialize_modules if (bubbles_euler .and. .not. polytropic) then call s_initialize_nonpoly() end if - if (num_procs > 1) call s_initialize_mpi_proxy_module() + if (num_procs > 1) then + call s_initialize_mpi_proxy_module() + call s_initialize_mpi_common_module() + end if + call s_initialize_boundary_common_module() call s_initialize_variables_conversion_module() call s_initialize_data_input_module() call s_initialize_derived_variables_module() @@ -734,7 +743,10 @@ impure subroutine s_finalize_modules call s_finalize_derived_variables_module() call s_finalize_data_input_module() call s_finalize_variables_conversion_module() - if (num_procs > 1) call s_finalize_mpi_proxy_module() + if (num_procs > 1) then + call s_finalize_mpi_proxy_module() + call s_finalize_mpi_common_module() + end if call s_finalize_global_parameters_module() ! Finalizing the MPI environment diff --git a/src/pre_process/m_boundary_conditions.fpp b/src/pre_process/m_boundary_conditions.fpp index 4fc2e5ff03..6ce2c07c24 100644 --- a/src/pre_process/m_boundary_conditions.fpp +++ b/src/pre_process/m_boundary_conditions.fpp @@ -24,9 +24,7 @@ module m_boundary_conditions real(wp) :: radius type(bounds_info) :: x_boundary, y_boundary, z_boundary !< - private; public :: s_apply_boundary_patches, & - s_write_serial_boundary_condition_files, & - s_write_parallel_boundary_condition_files + private; public :: s_apply_boundary_patches contains impure subroutine s_line_segment_bc(patch_id, bc_type) @@ -264,142 +262,4 @@ contains end subroutine s_apply_boundary_patches - impure subroutine s_write_serial_boundary_condition_files(q_prim_vf, bc_type, step_dirpath) - - type(scalar_field), dimension(sys_size) :: q_prim_vf - type(integer_field), dimension(1:num_dims, -1:1) :: bc_type - - character(LEN=*), intent(in) :: step_dirpath - - integer :: dir, loc - character(len=path_len) :: file_path - - character(len=10) :: status - - if (old_grid) then - status = 'old' - else - status = 'new' - end if - - call s_pack_boundary_condition_buffers(q_prim_vf) - - file_path = trim(step_dirpath)//'/bc_type.dat' - open (1, FILE=trim(file_path), FORM='unformatted', STATUS=status) - do dir = 1, num_dims - do loc = -1, 1, 2 - write (1) bc_type(dir, loc)%sf - end do - end do - close (1) - - file_path = trim(step_dirpath)//'/bc_buffers.dat' - open (1, FILE=trim(file_path), FORM='unformatted', STATUS=status) - do dir = 1, num_dims - do loc = -1, 1, 2 - write (1) bc_buffers(dir, loc)%sf - end do - end do - close (1) - - end subroutine s_write_serial_boundary_condition_files - - impure subroutine s_write_parallel_boundary_condition_files(q_prim_vf, bc_type) - - type(scalar_field), dimension(sys_size) :: q_prim_vf - type(integer_field), dimension(1:num_dims, -1:1) :: bc_type - - integer :: dir, loc - character(len=path_len) :: file_loc, file_path - -#ifdef MFC_MPI - integer :: ierr - integer :: file_id - integer :: offset - character(len=7) :: proc_rank_str - logical :: dir_check - - call s_pack_boundary_condition_buffers(q_prim_vf) - - file_loc = trim(case_dir)//'/restart_data/boundary_conditions' - if (proc_rank == 0) then - call my_inquire(file_loc, dir_check) - if (dir_check .neqv. .true.) then - call s_create_directory(trim(file_loc)) - end if - end if - - call s_create_mpi_types(bc_type) - - call s_mpi_barrier() - - call DelayFileAccess(proc_rank) - - write (proc_rank_str, '(I7.7)') proc_rank - file_path = trim(file_loc)//'/bc_'//trim(proc_rank_str)//'.dat' - call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_CREATE + MPI_MODE_WRONLY, MPI_INFO_NULL, file_id, ierr) - - offset = 0 - - ! Write bc_types - do dir = 1, num_dims - do loc = -1, 1, 2 - call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) - call MPI_File_write_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) - offset = offset + sizeof(bc_type(dir, loc)%sf) - end do - end do - - ! Write bc_buffers - do dir = 1, num_dims - do loc = -1, 1, 2 - call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) - call MPI_File_write_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) - offset = offset + sizeof(bc_buffers(dir, loc)%sf) - end do - end do - - call MPI_File_close(file_id, ierr) -#endif - - end subroutine s_write_parallel_boundary_condition_files - - impure subroutine s_pack_boundary_condition_buffers(q_prim_vf) - - type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf - integer :: i, j, k - - do k = 0, p - do j = 0, n - do i = 1, sys_size - bc_buffers(1, -1)%sf(i, j, k) = q_prim_vf(i)%sf(0, j, k) - bc_buffers(1, 1)%sf(i, j, k) = q_prim_vf(i)%sf(m, j, k) - end do - end do - end do - - if (n > 0) then - do k = 0, p - do j = 1, sys_size - do i = 0, m - bc_buffers(2, -1)%sf(i, j, k) = q_prim_vf(j)%sf(i, 0, k) - bc_buffers(2, 1)%sf(i, j, k) = q_prim_vf(j)%sf(i, n, k) - end do - end do - end do - - if (p > 0) then - do k = 1, sys_size - do j = 0, n - do i = 0, m - bc_buffers(3, -1)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, 0) - bc_buffers(3, 1)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, p) - end do - end do - end do - end if - end if - - end subroutine s_pack_boundary_condition_buffers - end module m_boundary_conditions diff --git a/src/pre_process/m_data_output.fpp b/src/pre_process/m_data_output.fpp index 86cc884eb0..b703baf50f 100644 --- a/src/pre_process/m_data_output.fpp +++ b/src/pre_process/m_data_output.fpp @@ -159,7 +159,7 @@ contains end if if (bc_io) then - call s_write_serial_boundary_condition_files(q_prim_vf, bc_type, t_step_dir) + call s_write_serial_boundary_condition_files(q_prim_vf, bc_type, t_step_dir, old_grid) end if ! x-coordinate direction diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp index 3950fdb42d..b9d60e2ac1 100644 --- a/src/pre_process/m_mpi_proxy.fpp +++ b/src/pre_process/m_mpi_proxy.fpp @@ -23,20 +23,8 @@ module m_mpi_proxy implicit none - integer, private :: err_code, ierr, v_size !< - !! Generic flags used to identify and report MPI errors - - real(wp), private, allocatable, dimension(:), target :: q_prims_buff_send !< - !! This variable is utilized to pack and send the buffer of the cell-average - !! primitive variables, for a single computational domain boundary at the - !! time, to the relevant neighboring processor. - - real(wp), private, allocatable, dimension(:), target :: q_prims_buff_recv !< - !! q_prims_buff_recv is utilized to receive and unpack the buffer of the cell- - !! average primitive variables, for a single computational domain boundary - !! at the time, from the relevant neighboring processor. - - ! integer :: halo_size + integer, private :: ierr !< + !! Generic flag used to identify and report MPI errors contains !> Since only processor with rank 0 is in charge of reading @@ -159,458 +147,5 @@ contains end subroutine s_mpi_bcast_user_inputs - !> Description: This subroutine takes care of efficiently distributing - !! the computational domain among the available processors - !! as well as recomputing some of the global parameters so - !! that they reflect the configuration of sub-domain that is - !! overseen by the local processor. - impure subroutine s_mpi_decompose_computational_domain - -#ifdef MFC_MPI - - ! # of processors in the x-, y- and z-coordinate directions - integer :: num_procs_x, num_procs_y, num_procs_z - - ! Temporary # of processors in x-, y- and z-coordinate directions - ! used during the processor factorization optimization procedure - real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z - - ! Processor factorization (fct) minimization parameter - real(wp) :: fct_min - - ! Cartesian processor topology communicator - integer :: MPI_COMM_CART - - ! Number of remaining cells for a particular coordinate direction - ! after the bulk has evenly been distributed among the available - ! processors for that coordinate direction - integer :: rem_cells - - ! Generic loop iterators - integer :: i, j - - if (num_procs == 1 .and. parallel_io) then - do i = 1, num_dims - start_idx(i) = 0 - end do - return - end if - - ! Performing the computational domain decomposition. The procedure - ! is optimized by ensuring that each processor contains a close to - ! equivalent piece of the computational domain. Note that explicit - ! type-casting is omitted here for code legibility purposes. - - ! Generating 3D Cartesian Processor Topology - - if (n > 0) then - - if (p > 0) then - - if (cyl_coord .and. p > 0) then - ! Implement pencil processor blocking if using cylindrical coordinates so - ! that all cells in azimuthal direction are stored on a single processor. - ! This is necessary for efficient application of Fourier filter near axis. - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = num_procs - num_procs_z = 1 - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - tmp_num_procs_z = num_procs_z - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = num_procs/i - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - .and. & - (n + 1)/tmp_num_procs_y & - >= & - num_stcls_min*weno_order) then - - num_procs_x = i - num_procs_y = num_procs/i - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - ierr = 0 - - end if - - end if - - end do - - else - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = 1 - num_procs_z = num_procs - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - tmp_num_procs_z = num_procs_z - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + 10._wp*abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - do j = 1, (num_procs/i) - - if (mod(num_procs/i, j) == 0 & - .and. & - (n + 1)/j >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = j - tmp_num_procs_z = num_procs/(i*j) - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) & - .and. & - (p + 1)/tmp_num_procs_z & - >= & - num_stcls_min*weno_order) & - then - - num_procs_x = i - num_procs_y = j - num_procs_z = num_procs/(i*j) - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) - ierr = 0 - - end if - - end if - - end do - - end if - - end do - - end if - - ! Checking whether the decomposition of the computational - ! domain was successful - if (proc_rank == 0 .and. ierr == -1) then - print '(A)', 'Unable to decompose computational '// & - 'domain for selected number of '// & - 'processors. Exiting.' - call MPI_ABORT(MPI_COMM_WORLD, 1, ierr) - end if - - ! Creating a new communicator using Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, & - num_procs_y, num_procs_z/), & - (/.true., .true., .true./), & - .false., MPI_COMM_CART, ierr) - - ! Finding corresponding Cartesian coordinates of the local - ! processor rank in newly declared cartesian communicator - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, & - proc_coords, ierr) - - ! END: Generating 3D Cartesian Processor Topology - - ! Sub-domain Global Parameters in z-direction - - ! Number of remaining cells after majority is distributed - rem_cells = mod(p + 1, num_procs_z) - - ! Preliminary uniform cell-width spacing - if (old_grid .neqv. .true.) then - dz = (z_domain%end - z_domain%beg)/real(p + 1, wp) - end if - - ! Optimal number of cells per processor - p = (p + 1)/num_procs_z - 1 - - ! Distributing any remaining cells - do i = 1, rem_cells - if (proc_coords(3) == i - 1) then - p = p + 1 - exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(3) > 0 .or. (bc_z%beg == BC_PERIODIC .and. num_procs_z > 1)) then - proc_coords(3) = proc_coords(3) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_z%beg, ierr) - proc_coords(3) = proc_coords(3) + 1 - end if - - ! Boundary condition at the end - if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == BC_PERIODIC .and. num_procs_z > 1)) then - proc_coords(3) = proc_coords(3) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_z%end, ierr) - proc_coords(3) = proc_coords(3) - 1 - end if - - ! Beginning and end sub-domain boundary locations - if (parallel_io .neqv. .true.) then - if (old_grid .neqv. .true.) then - if (proc_coords(3) < rem_cells) then - z_domain%beg = z_domain%beg + dz*real((p + 1)* & - proc_coords(3)) - z_domain%end = z_domain%end - dz*real((p + 1)* & - (num_procs_z - proc_coords(3) - 1) & - - (num_procs_z - rem_cells)) - else - z_domain%beg = z_domain%beg + dz*real((p + 1)* & - proc_coords(3) + rem_cells) - z_domain%end = z_domain%end - dz*real((p + 1)* & - (num_procs_z - proc_coords(3) - 1)) - end if - end if - else - if (proc_coords(3) < rem_cells) then - start_idx(3) = (p + 1)*proc_coords(3) - else - start_idx(3) = (p + 1)*proc_coords(3) + rem_cells - end if - end if - - ! Generating 2D Cartesian Processor Topology - - else - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = num_procs - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = num_procs/i - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - .and. & - (n + 1)/tmp_num_procs_y & - >= & - num_stcls_min*weno_order) then - - num_procs_x = i - num_procs_y = num_procs/i - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - ierr = 0 - - end if - - end if - - end do - - ! Checking whether the decomposition of the computational - ! domain was successful - if (proc_rank == 0 .and. ierr == -1) then - print '(A)', 'Unable to decompose computational '// & - 'domain for selected number of '// & - 'processors. Exiting.' - call MPI_ABORT(MPI_COMM_WORLD, 1, ierr) - end if - - ! Creating a new communicator using Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, & - num_procs_y/), (/.true., & - .true./), .false., MPI_COMM_CART, & - ierr) - ! Finding corresponding Cartesian coordinates of the local - ! processor rank in newly declared cartesian communicator - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, & - proc_coords, ierr) - - end if - - ! END: Generating 2D Cartesian Processor Topology - - ! Sub-domain Global Parameters in y-direction - - ! Number of remaining cells after majority has been distributed - rem_cells = mod(n + 1, num_procs_y) - - ! Preliminary uniform cell-width spacing - if (old_grid .neqv. .true.) then - dy = (y_domain%end - y_domain%beg)/real(n + 1, wp) - end if - - ! Optimal number of cells per processor - n = (n + 1)/num_procs_y - 1 - - ! Distributing any remaining cells - do i = 1, rem_cells - if (proc_coords(2) == i - 1) then - n = n + 1 - exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(2) > 0 .or. (bc_y%beg == BC_PERIODIC .and. num_procs_y > 1)) then - proc_coords(2) = proc_coords(2) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_y%beg, ierr) - proc_coords(2) = proc_coords(2) + 1 - end if - - ! Boundary condition at the end - if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == BC_PERIODIC .and. num_procs_y > 1)) then - proc_coords(2) = proc_coords(2) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_y%end, ierr) - proc_coords(2) = proc_coords(2) - 1 - end if - - ! Beginning and end sub-domain boundary locations - if (parallel_io .neqv. .true.) then - if (old_grid .neqv. .true.) then - if (proc_coords(2) < rem_cells) then - y_domain%beg = y_domain%beg + dy*real((n + 1)* & - proc_coords(2)) - y_domain%end = y_domain%end - dy*real((n + 1)* & - (num_procs_y - proc_coords(2) - 1) & - - (num_procs_y - rem_cells)) - else - y_domain%beg = y_domain%beg + dy*real((n + 1)* & - proc_coords(2) + rem_cells) - y_domain%end = y_domain%end - dy*real((n + 1)* & - (num_procs_y - proc_coords(2) - 1)) - end if - end if - else - if (proc_coords(2) < rem_cells) then - start_idx(2) = (n + 1)*proc_coords(2) - else - start_idx(2) = (n + 1)*proc_coords(2) + rem_cells - end if - end if - - ! Generating 1D Cartesian Processor Topology - - else - - ! Number of processors in the coordinate direction is equal to - ! the total number of processors available - num_procs_x = num_procs - - ! Creating a new communicator using Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), & - (/.true./), .false., MPI_COMM_CART, & - ierr) - - ! Finding the corresponding Cartesian coordinates of the local - ! processor rank in the newly declared cartesian communicator - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, & - proc_coords, ierr) - - end if - - ! Sub-domain Global Parameters in x-direction - - ! Number of remaining cells after majority has been distributed - rem_cells = mod(m + 1, num_procs_x) - - ! Preliminary uniform cell-width spacing - if (old_grid .neqv. .true.) then - dx = (x_domain%end - x_domain%beg)/real(m + 1, wp) - end if - - ! Optimal number of cells per processor - m = (m + 1)/num_procs_x - 1 - - ! Distributing any remaining cells - do i = 1, rem_cells - if (proc_coords(1) == i - 1) then - m = m + 1 - exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(1) > 0 .or. (bc_x%beg == BC_PERIODIC .and. num_procs_x > 1)) then - proc_coords(1) = proc_coords(1) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr) - proc_coords(1) = proc_coords(1) + 1 - end if - - ! Boundary condition at the end - if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == BC_PERIODIC .and. num_procs_x > 1)) then - proc_coords(1) = proc_coords(1) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr) - proc_coords(1) = proc_coords(1) - 1 - end if - - ! Beginning and end sub-domain boundary locations - if (parallel_io .neqv. .true.) then - if (old_grid .neqv. .true.) then - if (proc_coords(1) < rem_cells) then - x_domain%beg = x_domain%beg + dx*real((m + 1)* & - proc_coords(1)) - x_domain%end = x_domain%end - dx*real((m + 1)* & - (num_procs_x - proc_coords(1) - 1) & - - (num_procs_x - rem_cells)) - else - x_domain%beg = x_domain%beg + dx*real((m + 1)* & - proc_coords(1) + rem_cells) - x_domain%end = x_domain%end - dx*real((m + 1)* & - (num_procs_x - proc_coords(1) - 1)) - end if - end if - else - if (proc_coords(1) < rem_cells) then - start_idx(1) = (m + 1)*proc_coords(1) - else - start_idx(1) = (m + 1)*proc_coords(1) + rem_cells - end if - end if - -#endif - - end subroutine s_mpi_decompose_computational_domain end module m_mpi_proxy diff --git a/src/pre_process/m_perturbation.fpp b/src/pre_process/m_perturbation.fpp index f4909e89ee..209c752287 100644 --- a/src/pre_process/m_perturbation.fpp +++ b/src/pre_process/m_perturbation.fpp @@ -32,8 +32,6 @@ contains impure subroutine s_initialize_perturbation_module() - bcxb = bc_x%beg; bcxe = bc_x%end; bcyb = bc_y%beg; bcye = bc_y%end; bczb = bc_z%beg; bcze = bc_z%end - if (mixlayer_perturb) then mixlayer_bc_fd = 2 nbp = n + 2 @@ -624,7 +622,7 @@ contains do q = 1, elliptic_smoothing_iters ! Communication of buffer regions and apply boundary conditions - call s_populate_variables_buffers(q_prim_vf, pb%sf, mv%sf, bc_type) + call s_populate_variables_buffers(bc_type, q_prim_vf, pb%sf, mv%sf) ! Perform smoothing and store in temp array if (n == 0) then diff --git a/src/simulation/m_boundary_conditions.fpp b/src/simulation/m_boundary_conditions.fpp deleted file mode 100644 index d0666e2ca7..0000000000 --- a/src/simulation/m_boundary_conditions.fpp +++ /dev/null @@ -1,125 +0,0 @@ -! @file m_boundary_conditions.fpp -! @brief Contains module m_boundary_conditions - -!> @brief This module contains -module m_boundary_conditions - - use m_derived_types - - use m_global_parameters -#ifdef MFC_MPI - use mpi -#endif - use m_delay_file_access - - use m_compile_specific - - use m_boundary_common - -contains - - impure subroutine s_read_serial_boundary_condition_files(step_dirpath, bc_type) - - character(LEN=*), intent(in) :: step_dirpath - - type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type - - integer :: dir, loc - logical :: file_exist - character(len=path_len) :: file_path - - ! Read bc_types - file_path = trim(step_dirpath)//'/bc_type.dat' - inquire (FILE=trim(file_path), EXIST=file_exist) - if (.not. file_exist) then - call s_mpi_abort(trim(file_path)//' is missing. Exiting.') - end if - - open (1, FILE=trim(file_path), FORM='unformatted', STATUS='unknown') - do dir = 1, num_dims - do loc = -1, 1, 2 - read (1) bc_type(dir, loc)%sf - !$acc update device(bc_type(dir, loc)%sf) - end do - end do - close (1) - - ! Read bc_buffers - file_path = trim(step_dirpath)//'/bc_buffers.dat' - inquire (FILE=trim(file_path), EXIST=file_exist) - if (.not. file_exist) then - call s_mpi_abort(trim(file_path)//' is missing. Exiting.') - end if - - open (1, FILE=trim(file_path), FORM='unformatted', STATUS='unknown') - do dir = 1, num_dims - do loc = -1, 1, 2 - read (1) bc_buffers(dir, loc)%sf - !$acc update device(bc_buffers(dir, loc)%sf) - end do - end do - close (1) - - end subroutine s_read_serial_boundary_condition_files - - impure subroutine s_read_parallel_boundary_condition_files(bc_type) - - type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type - - integer :: dir, loc - character(len=path_len) :: file_loc, file_path - -#ifdef MFC_MPI - integer :: ierr - integer :: file_id - integer :: offset - character(len=7) :: proc_rank_str - logical :: dir_check - - file_loc = trim(case_dir)//'/restart_data/boundary_conditions' - - if (proc_rank == 0) then - call my_inquire(file_loc, dir_check) - if (dir_check .neqv. .true.) then - call s_mpi_abort(trim(file_loc)//' is missing. Exiting.') - end if - end if - - call s_create_mpi_types(bc_type) - - call s_mpi_barrier() - - call DelayFileAccess(proc_rank) - - write (proc_rank_str, '(I7.7)') proc_rank - file_path = trim(file_loc)//'/bc_'//trim(proc_rank_str)//'.dat' - call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_RDONLY, MPI_INFO_NULL, file_id, ierr) - - offset = 0 - - ! Read bc_types - do dir = 1, num_dims - do loc = -1, 1, 2 - call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) - call MPI_File_read_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) - offset = offset + sizeof(bc_type(dir, loc)%sf) - !$acc update device(bc_type(dir, loc)%sf) - end do - end do - - ! Read bc_buffers - do dir = 1, num_dims - do loc = -1, 1, 2 - call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) - call MPI_File_read_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) - offset = offset + sizeof(bc_buffers(dir, loc)%sf) - !$acc update device(bc_buffers(dir, loc)%sf) - end do - end do - - call MPI_File_close(file_id, ierr) -#endif - - end subroutine s_read_parallel_boundary_condition_files - -end module m_boundary_conditions diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index d0b94f6b7a..9362f013ad 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -190,8 +190,6 @@ module m_global_parameters integer :: num_bc_patches logical :: bc_io - integer :: BC_RIEMANN_EXTRAPOLATION - integer :: BC_GHOST_EXTRAPOLATION !> @name Boundary conditions (BC) in the x-, y- and z-directions, respectively !> @{ type(int_bounds_info) :: bc_x, bc_y, bc_z @@ -584,8 +582,6 @@ contains num_bc_patches = 0 bc_io = .false. - BC_RIEMANN_EXTRAPOLATION = -4 - BC_GHOST_EXTRAPOLATION = -3 bc_x%beg = dflt_int; bc_x%end = dflt_int bc_y%beg = dflt_int; bc_y%end = dflt_int diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index c7c96890aa..0314aad864 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -85,7 +85,7 @@ contains !$acc update device(levelset_norm%sf) ! Get neighboring IB variables from other processors - call s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers) + call s_populate_ib_buffers() !$acc update host(ib_markers%sf) @@ -108,6 +108,18 @@ contains end subroutine s_ibm_setup + subroutine s_populate_ib_buffers() + + #:for DIRC, DIRI in [('x', 1), ('y', 2), ('z', 3)] + #:for LOCC, LOCI in [('beg', -1), ('end', 1)] + if (bc_${DIRC}$%${LOCC}$ >= 0) then + call s_mpi_sendrecv_ib_buffers(ib_markers, ${DIRI}$, ${LOCI}$) + end if + #:endfor + #:endfor + + end subroutine s_populate_ib_buffers + !> Subroutine that updates the conservative variables at the ghost points !! @param q_cons_vf Conservative Variables !! @param q_prim_vf Primitive variables diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index f931227f4d..ca4a4e3e8e 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -32,24 +32,52 @@ module m_mpi_proxy implicit none - integer, private, allocatable, dimension(:), target :: ib_buff_send !< + integer, private, allocatable, dimension(:) :: ib_buff_send !< !! This variable is utilized to pack and send the buffer of the immersed !! boundary markers, for a single computational domain boundary at the !! time, to the relevant neighboring processor. - integer, private, allocatable, dimension(:), target :: ib_buff_recv !< + integer, private, allocatable, dimension(:) :: ib_buff_recv !< !! q_cons_buff_recv is utilized to receive and unpack the buffer of the !! immersed boundary markers, for a single computational domain boundary !! at the time, from the relevant neighboring processor. !> @name Generic flags used to identify and report MPI errors !> @{ - integer, private :: err_code, ierr, v_size + integer, private :: ierr !> @} - !$acc declare create(v_size) + + integer :: i_halo_size + !$acc declare create(i_halo_size) contains + subroutine s_initialize_mpi_proxy_module() + +#ifdef MFC_MPI + if (ib) then + if (n > 0) then + if (p > 0) then + i_halo_size = -1 + gp_layers* & + & (m + 2*gp_layers + 1)* & + & (n + 2*gp_layers + 1)* & + & (p + 2*gp_layers + 1)/ & + & (min(m, n, p) + 2*gp_layers + 1) + else + i_halo_size = -1 + gp_layers* & + & (max(m, n) + 2*gp_layers + 1) + end if + else + i_halo_size = -1 + gp_layers + end if + + !$acc update device(i_halo_size) + @:ALLOCATE(ib_buff_send(0:i_halo_size), ib_buff_recv(0:i_halo_size)) + end if +#endif + + end subroutine s_initialize_mpi_proxy_module + !> Since only the processor with rank 0 reads and verifies !! the consistency of user inputs, these are initially not !! available to the other processors. Then, the purpose of @@ -207,1405 +235,155 @@ contains end subroutine s_mpi_bcast_user_inputs - !> The purpose of this procedure is to optimally decompose - !! the computational domain among the available processors. - !! This is performed by attempting to award each processor, - !! in each of the coordinate directions, approximately the - !! same number of cells, and then recomputing the affected - !! global parameters. - impure subroutine s_mpi_decompose_computational_domain - -#ifdef MFC_MPI - - integer :: num_procs_x, num_procs_y, num_procs_z !< - !! Optimal number of processors in the x-, y- and z-directions - - real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z !< - !! Non-optimal number of processors in the x-, y- and z-directions - - real(wp) :: fct_min !< - !! Processor factorization (fct) minimization parameter - - integer :: MPI_COMM_CART !< - !! Cartesian processor topology communicator - - integer :: rem_cells !< - !! Remaining number of cells, in a particular coordinate direction, - !! after the majority is divided up among the available processors - - integer :: i, j !< Generic loop iterators - - if (num_procs == 1 .and. parallel_io) then - do i = 1, num_dims - start_idx(i) = 0 - end do - return - end if - - ! 3D Cartesian Processor Topology - if (n > 0) then - - if (p > 0) then - - if (cyl_coord .and. p > 0) then - ! Implement pencil processor blocking if using cylindrical coordinates so - ! that all cells in azimuthal direction are stored on a single processor. - ! This is necessary for efficient application of Fourier filter near axis. - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = num_procs - num_procs_z = 1 - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - tmp_num_procs_z = num_procs_z - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = num_procs/i - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - .and. & - (n + 1)/tmp_num_procs_y & - >= & - num_stcls_min*weno_order) then - - num_procs_x = i - num_procs_y = num_procs/i - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - ierr = 0 - - end if - - end if - - end do - - else - - ! Initial estimate of optimal processor topology - num_procs_x = 1 - num_procs_y = 1 - num_procs_z = num_procs - ierr = -1 - - ! Benchmarking the quality of this initial guess - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - tmp_num_procs_z = num_procs_z - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + 10._wp*abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) - - ! Optimization of the initial processor topology - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - do j = 1, num_procs/i - - if (mod(num_procs/i, j) == 0 & - .and. & - (n + 1)/j >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = j - tmp_num_procs_z = num_procs/(i*j) - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) & - .and. & - (p + 1)/tmp_num_procs_z & - >= & - num_stcls_min*weno_order) & - then - - num_procs_x = i - num_procs_y = j - num_procs_z = num_procs/(i*j) - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) - ierr = 0 - - end if - - end if - - end do - - end if - - end do - - end if - - ! Verifying that a valid decomposition of the computational - ! domain has been established. If not, the simulation exits. - if (proc_rank == 0 .and. ierr == -1) then - call s_mpi_abort('Unsupported combination of values '// & - 'of num_procs, m, n, p and '// & - 'weno_order. Exiting.') - end if - - ! Creating new communicator using the Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, & - num_procs_y, num_procs_z/), & - (/.true., .true., .true./), & - .false., MPI_COMM_CART, ierr) - - ! Finding the Cartesian coordinates of the local process - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, & - proc_coords, ierr) - ! END: 3D Cartesian Processor Topology - - ! Global Parameters for z-direction - - ! Number of remaining cells - rem_cells = mod(p + 1, num_procs_z) - - ! Optimal number of cells per processor - p = (p + 1)/num_procs_z - 1 - - ! Distributing the remaining cells - do i = 1, rem_cells - if (proc_coords(3) == i - 1) then - p = p + 1; exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(3) > 0 .or. (bc_z%beg == BC_PERIODIC .and. num_procs_z > 1)) then - proc_coords(3) = proc_coords(3) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_z%beg, ierr) - proc_coords(3) = proc_coords(3) + 1 - end if - - ! Boundary condition at the end - if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == BC_PERIODIC .and. num_procs_z > 1)) then - proc_coords(3) = proc_coords(3) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_z%end, ierr) - proc_coords(3) = proc_coords(3) - 1 - end if - - if (parallel_io) then - if (proc_coords(3) < rem_cells) then - start_idx(3) = (p + 1)*proc_coords(3) - else - start_idx(3) = (p + 1)*proc_coords(3) + rem_cells - end if - end if - - ! 2D Cartesian Processor Topology - else - - ! Initial estimate of optimal processor topology - num_procs_x = 1 - num_procs_y = num_procs - ierr = -1 - - ! Benchmarking the quality of this initial guess - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - - ! Optimization of the initial processor topology - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = num_procs/i - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - .and. & - (n + 1)/tmp_num_procs_y & - >= & - num_stcls_min*weno_order) then - - num_procs_x = i - num_procs_y = num_procs/i - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - ierr = 0 - - end if - - end if - - end do - - ! Verifying that a valid decomposition of the computational - ! domain has been established. If not, the simulation exits. - if (proc_rank == 0 .and. ierr == -1) then - call s_mpi_abort('Unsupported combination of values '// & - 'of num_procs, m, n and '// & - 'weno_order. Exiting.') - end if - - ! Creating new communicator using the Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, & - num_procs_y/), (/.true., & - .true./), .false., MPI_COMM_CART, & - ierr) - - ! Finding the Cartesian coordinates of the local process - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, & - proc_coords, ierr) - - end if - ! END: 2D Cartesian Processor Topology - - ! Global Parameters for y-direction - - ! Number of remaining cells - rem_cells = mod(n + 1, num_procs_y) - - ! Optimal number of cells per processor - n = (n + 1)/num_procs_y - 1 - - ! Distributing the remaining cells - do i = 1, rem_cells - if (proc_coords(2) == i - 1) then - n = n + 1; exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(2) > 0 .or. (bc_y%beg == BC_PERIODIC .and. num_procs_y > 1)) then - proc_coords(2) = proc_coords(2) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_y%beg, ierr) - proc_coords(2) = proc_coords(2) + 1 - end if - - ! Boundary condition at the end - if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == BC_PERIODIC .and. num_procs_y > 1)) then - proc_coords(2) = proc_coords(2) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_y%end, ierr) - proc_coords(2) = proc_coords(2) - 1 - end if - - if (parallel_io) then - if (proc_coords(2) < rem_cells) then - start_idx(2) = (n + 1)*proc_coords(2) - else - start_idx(2) = (n + 1)*proc_coords(2) + rem_cells - end if - end if - - ! 1D Cartesian Processor Topology - else - - ! Optimal processor topology - num_procs_x = num_procs - - ! Creating new communicator using the Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), & - (/.true./), .false., MPI_COMM_CART, & - ierr) - - ! Finding the Cartesian coordinates of the local process - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, & - proc_coords, ierr) - - end if - - ! Global Parameters for x-direction - - ! Number of remaining cells - rem_cells = mod(m + 1, num_procs_x) - - ! Optimal number of cells per processor - m = (m + 1)/num_procs_x - 1 - - ! Distributing the remaining cells - do i = 1, rem_cells - if (proc_coords(1) == i - 1) then - m = m + 1; exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(1) > 0 .or. (bc_x%beg == BC_PERIODIC .and. num_procs_x > 1)) then - proc_coords(1) = proc_coords(1) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr) - proc_coords(1) = proc_coords(1) + 1 - end if - - ! Boundary condition at the end - if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == BC_PERIODIC .and. num_procs_x > 1)) then - proc_coords(1) = proc_coords(1) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr) - proc_coords(1) = proc_coords(1) - 1 - end if - - if (parallel_io) then - if (proc_coords(1) < rem_cells) then - start_idx(1) = (m + 1)*proc_coords(1) - else - start_idx(1) = (m + 1)*proc_coords(1) + rem_cells - end if - end if - -#endif - - end subroutine s_mpi_decompose_computational_domain - - !> The goal of this procedure is to populate the buffers of - !! the grid variables by communicating with the neighboring - !! processors. Note that only the buffers of the cell-width - !! distributions are handled in such a way. This is because - !! the buffers of cell-boundary locations may be calculated - !! directly from those of the cell-width distributions. - !! @param mpi_dir MPI communication coordinate direction - !! @param pbc_loc Processor boundary condition (PBC) location - impure subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc) - - integer, intent(in) :: mpi_dir - integer, intent(in) :: pbc_loc - -#ifdef MFC_MPI - - ! MPI Communication in x-direction - if (mpi_dir == 1) then - - if (pbc_loc == -1) then ! PBC at the beginning - - if (bc_x%end >= 0) then ! PBC at the beginning and end - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - dx(m - buff_size + 1), buff_size, & - mpi_p, bc_x%end, 0, & - dx(-buff_size), buff_size, & - mpi_p, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the beginning only - - ! Send/receive buffer to/from bc_x%beg/bc_x%beg - call MPI_SENDRECV( & - dx(0), buff_size, & - mpi_p, bc_x%beg, 1, & - dx(-buff_size), buff_size, & - mpi_p, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - else ! PBC at the end - - if (bc_x%beg >= 0) then ! PBC at the end and beginning - - ! Send/receive buffer to/from bc_x%beg/bc_x%end - call MPI_SENDRECV( & - dx(0), buff_size, & - mpi_p, bc_x%beg, 1, & - dx(m + 1), buff_size, & - mpi_p, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the end only - - ! Send/receive buffer to/from bc_x%end/bc_x%end - call MPI_SENDRECV( & - dx(m - buff_size + 1), buff_size, & - mpi_p, bc_x%end, 0, & - dx(m + 1), buff_size, & - mpi_p, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - end if - ! END: MPI Communication in x-direction - - ! MPI Communication in y-direction - elseif (mpi_dir == 2) then - - if (pbc_loc == -1) then ! PBC at the beginning - - if (bc_y%end >= 0) then ! PBC at the beginning and end - - ! Send/receive buffer to/from bc_y%end/bc_y%beg - call MPI_SENDRECV( & - dy(n - buff_size + 1), buff_size, & - mpi_p, bc_y%end, 0, & - dy(-buff_size), buff_size, & - mpi_p, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the beginning only - - ! Send/receive buffer to/from bc_y%beg/bc_y%beg - call MPI_SENDRECV( & - dy(0), buff_size, & - mpi_p, bc_y%beg, 1, & - dy(-buff_size), buff_size, & - mpi_p, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - else ! PBC at the end - - if (bc_y%beg >= 0) then ! PBC at the end and beginning - - ! Send/receive buffer to/from bc_y%beg/bc_y%end - call MPI_SENDRECV( & - dy(0), buff_size, & - mpi_p, bc_y%beg, 1, & - dy(n + 1), buff_size, & - mpi_p, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the end only - - ! Send/receive buffer to/from bc_y%end/bc_y%end - call MPI_SENDRECV( & - dy(n - buff_size + 1), buff_size, & - mpi_p, bc_y%end, 0, & - dy(n + 1), buff_size, & - mpi_p, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - end if - ! END: MPI Communication in y-direction - - ! MPI Communication in z-direction - else - - if (pbc_loc == -1) then ! PBC at the beginning - - if (bc_z%end >= 0) then ! PBC at the beginning and end - - ! Send/receive buffer to/from bc_z%end/bc_z%beg - call MPI_SENDRECV( & - dz(p - buff_size + 1), buff_size, & - mpi_p, bc_z%end, 0, & - dz(-buff_size), buff_size, & - mpi_p, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the beginning only - - ! Send/receive buffer to/from bc_z%beg/bc_z%beg - call MPI_SENDRECV( & - dz(0), buff_size, & - mpi_p, bc_z%beg, 1, & - dz(-buff_size), buff_size, & - mpi_p, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - else ! PBC at the end - - if (bc_z%beg >= 0) then ! PBC at the end and beginning - - ! Send/receive buffer to/from bc_z%beg/bc_z%end - call MPI_SENDRECV( & - dz(0), buff_size, & - mpi_p, bc_z%beg, 1, & - dz(p + 1), buff_size, & - mpi_p, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the end only - - ! Send/receive buffer to/from bc_z%end/bc_z%end - call MPI_SENDRECV( & - dz(p - buff_size + 1), buff_size, & - mpi_p, bc_z%end, 0, & - dz(p + 1), buff_size, & - mpi_p, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - end if - - end if - ! END: MPI Communication in z-direction - -#endif - - end subroutine s_mpi_sendrecv_grid_variables_buffers - - !> The goal of this procedure is to populate the buffers of - !! the cell-average conservative variables by communicating - !! with the neighboring processors. - impure subroutine s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers) + subroutine s_mpi_sendrecv_ib_buffers(ib_markers, mpi_dir, pbc_loc) type(integer_field), intent(inout) :: ib_markers - integer, intent(in) :: gp_layers - - integer :: j, k, l, r !< Generic loop iterators - integer, pointer, dimension(:) :: p_i_send, p_i_recv - -#ifdef MFC_MPI - - if (n > 0) then - if (p > 0) then - @:ALLOCATE(ib_buff_send(0:-1 + gp_layers * & - & (m + 2*gp_layers + 1)* & - & (n + 2*gp_layers + 1)* & - & (p + 2*gp_layers + 1)/ & - & (min(m, n, p) + 2*gp_layers + 1))) - else - @:ALLOCATE(ib_buff_send(0:-1 + gp_layers* & - & (max(m, n) + 2*gp_layers + 1))) - end if - else - @:ALLOCATE(ib_buff_send(0:-1 + gp_layers)) - end if - @:ALLOCATE(ib_buff_recv(0:ubound(ib_buff_send, 1))) - - !nCalls_time = nCalls_time + 1 - - ! MPI Communication in x-direction - if (bc_x%beg >= 0) then ! PBC at the beginning - - if (bc_x%end >= 0) then ! PBC at the beginning and end - - ! Packing buffer to be sent to bc_x%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = m - gp_layers + 1, m - r = ((j - m - 1) + gp_layers*((k + 1) + (n + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv + integer, intent(in) :: mpi_dir, pbc_loc - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) + integer :: i, j, k, l, r, q !< Generic loop iterators - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 0, & - p_i_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + integer :: buffer_counts(1:3), buffer_count - !$acc end host_data - !$acc end data - !$acc wait - else -#endif + type(int_bounds_info) :: boundary_conditions(1:3) + integer :: beg_end(1:2), grid_dims(1:3) + integer :: dst_proc, src_proc, recv_tag, send_tag - !$acc update host(ib_buff_send, ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 0, & - ib_buff_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + logical :: beg_end_geq_0, qbmm_comm -#if defined(MFC_OpenACC) - end if -#endif - - else ! PBC at the beginning only - - ! Packing buffer to be sent to bc_x%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = 0, gp_layers - 1 - r = (j + gp_layers*(k + (n + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do + integer :: pack_offset, unpack_offset - !call MPI_Barrier(MPI_COMM_WORLD, ierr) + integer, pointer :: p_i_send, p_i_recv -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv +#ifdef MFC_MPI - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) + call nvtxStartRange("IB-MARKER-COMM-PACKBUF") - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 1, & - p_i_recv(0), & + buffer_counts = (/ & gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + gp_layers*(m + 2*gp_layers + 1)*(p + 1), & + gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1) & + /) - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - !$acc update host(ib_buff_send) + buffer_count = buffer_counts(mpi_dir) + boundary_conditions = (/bc_x, bc_y, bc_z/) + beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) + beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 1, & - ib_buff_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + ! Implements: + ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc + ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] + ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg] + ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] + ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end] -#if defined(MFC_OpenACC) - end if -#endif - - end if + send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) + recv_tag = f_logical_to_int(pbc_loc == 1) -#if defined(MFC_OpenACC) - if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) - end if -#endif + dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) + src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) - ! Unpacking buffer received from bc_x%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = -gp_layers, -1 - r = (j + gp_layers*((k + 1) + (n + 1)*l)) - ib_markers%sf(j, k, l) = ib_buff_recv(r) - end do - end do - end do + grid_dims = (/m, n, p/) + pack_offset = 0 + if (f_xor(pbc_loc == 1, beg_end_geq_0)) then + pack_offset = grid_dims(mpi_dir) - gp_layers + 1 end if - if (bc_x%end >= 0) then ! PBC at the end - - if (bc_x%beg >= 0) then ! PBC at the end and beginning - - !$acc parallel loop collapse(3) gang vector default(present) private(r) - ! Packing buffer to be sent to bc_x%beg - do l = 0, p - do k = 0, n - do j = 0, gp_layers - 1 - r = (j + gp_layers*(k + (n + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 1, & - p_i_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 1, & - ib_buff_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - else ! PBC at the end only - - ! Packing buffer to be sent to bc_x%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = m - gp_layers + 1, m - r = ((j - m - 1) + gp_layers*((k + 1) + (n + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 0, & - p_i_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 0, & - ib_buff_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - end if - - if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) - end if - - ! Unpacking buffer received from bc_x%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = m + 1, m + gp_layers - r = ((j - m - 1) + gp_layers*(k + (n + 1)*l)) - ib_markers%sf(j, k, l) = ib_buff_recv(r) - end do - end do - end do - + unpack_offset = 0 + if (pbc_loc == 1) then + unpack_offset = grid_dims(mpi_dir) + gp_layers + 1 end if - ! END: MPI Communication in x-direction - - ! MPI Communication in y-direction - - if (bc_y%beg >= 0) then ! PBC at the beginning - - if (bc_y%end >= 0) then ! PBC at the beginning and end - - ! Packing buffer to be sent to bc_y%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = n - gp_layers + 1, n - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k - n + gp_layers - 1) + gp_layers*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 0, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 0, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - else ! PBC at the beginning only - - ! Packing buffer to be sent to bc_y%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, gp_layers - 1 - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - (k + gp_layers*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) + ! Pack Buffer to Send + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + !$acc parallel loop collapse(3) gang vector default(present) private(r) + do l = 0, p + do k = 0, n + do j = 0, gp_layers - 1 + r = (j + gp_layers*(k + (n + 1)*l)) + ib_buff_send(r) = ib_markers%sf(j + pack_offset, k, l) + end do end do end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 1, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 1, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - end if - -#if defined(MFC_OpenACC) - if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) - end if -#endif - - ! Unpacking buffer received from bc_y%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = -gp_layers, -1 - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + gp_layers*l)) - ib_markers%sf(j, k, l) = ib_buff_recv(r) - end do - end do - end do - - end if - - if (bc_y%end >= 0) then ! PBC at the end - - if (bc_y%beg >= 0) then ! PBC at the end and beginning - - ! Packing buffer to be sent to bc_y%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, gp_layers - 1 - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - (k + gp_layers*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) + #:elif mpi_dir == 2 + !$acc parallel loop collapse(3) gang vector default(present) private(r) + do l = 0, p + do k = 0, gp_layers - 1 + do j = -gp_layers, m + gp_layers + r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & + (k + gp_layers*l)) + ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l) + end do end do end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 1, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 1, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - else ! PBC at the end only - - ! Packing buffer to be sent to bc_y%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = n - gp_layers + 1, n - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k - n + gp_layers - 1) + gp_layers*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) + #:else + !$acc parallel loop collapse(3) gang vector default(present) private(r) + do l = 0, gp_layers - 1 + do k = -gp_layers, n + gp_layers + do j = -gp_layers, m + gp_layers + r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & + ((k + gp_layers) + (n + 2*gp_layers + 1)*l)) + ib_buff_send(r) = ib_markers%sf(j, k, l + pack_offset) + end do end do end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 0, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 0, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - + #:endif end if - -#if defined(MFC_OpenACC) - if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) - end if -#endif - - ! Unpacking buffer received form bc_y%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = n + 1, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k - n - 1) + gp_layers*l)) - ib_markers%sf(j, k, l) = ib_buff_recv(r) - end do - end do - end do - - end if - ! END: MPI Communication in y-direction - - ! MPI Communication in z-direction - if (bc_z%beg >= 0) then ! PBC at the beginning - - if (bc_z%end >= 0) then ! PBC at the beginning and end - - ! Packing buffer to be sent to bc_z%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = p - gp_layers + 1, p - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)* & - (l - p + gp_layers - 1))) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 0, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 0, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - else ! PBC at the beginning only - - ! Packing buffer to be sent to bc_z%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, gp_layers - 1 - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) + #:endfor + call nvtxEndRange ! Packbuf + + call nvtxStartRange("IB-MARKER-SENDRECV") + call MPI_SENDRECV( & + ib_buff_send, buffer_count, MPI_INTEGER, dst_proc, send_tag, & + ib_buff_recv, buffer_count, MPI_INTEGER, src_proc, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA + + ! Unpack Received Buffer + call nvtxStartRange("IB-MARKER-COMM-UNPACKBUF") + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + !$acc parallel loop collapse(3) gang vector default(present) private(r) + do l = 0, p + do k = 0, n + do j = -gp_layers, -1 + r = (j + gp_layers*((k + 1) + (n + 1)*l)) + ib_markers%sf(j + unpack_offset, k, l) = ib_buff_recv(r) + end do end do end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 1, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 1, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - end if - -#if defined(MFC_OpenACC) - if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) - end if -#endif - - ! Unpacking buffer from bc_z%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = -gp_layers, -1 - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)* & - (l + gp_layers))) - ib_markers%sf(j, k, l) = ib_buff_recv(r) - end do - end do - end do - - end if - - if (bc_z%end >= 0) then ! PBC at the end - - if (bc_z%beg >= 0) then ! PBC at the end and beginning - - ! Packing buffer to be sent to bc_z%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, gp_layers - 1 - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) + #:elif mpi_dir == 2 + !$acc parallel loop collapse(3) gang vector default(present) private(r) + do l = 0, p + do k = -gp_layers, -1 + do j = -gp_layers, m + gp_layers + r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & + ((k + gp_layers) + gp_layers*l)) + ib_markers%sf(j, k + unpack_offset, l) = ib_buff_recv(r) + end do end do end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 1, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 1, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - else ! PBC at the end only - - ! Packing buffer to be sent to bc_z%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = p - gp_layers + 1, p - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)* & - (l - p + gp_layers - 1))) - ib_buff_send(r) = ib_markers%sf(j, k, l) + #:else + ! Unpacking buffer from bc_z%beg + !$acc parallel loop collapse(3) gang vector default(present) private(r) + do l = -gp_layers, -1 + do k = -gp_layers, n + gp_layers + do j = -gp_layers, m + gp_layers + r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & + ((k + gp_layers) + (n + 2*gp_layers + 1)* & + (l + gp_layers))) + ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r) + end do end do end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 0, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 0, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - + #:endif end if - -#if defined(MFC_OpenACC) - if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) - end if -#endif - - ! Unpacking buffer received from bc_z%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = p + 1, p + gp_layers - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)* & - (l - p - 1))) - ib_markers%sf(j, k, l) = ib_buff_recv(r) - end do - end do - end do - - end if - - ! END: MPI Communication in z-direction - + #:endfor + call nvtxEndRange #endif end subroutine s_mpi_sendrecv_ib_buffers @@ -1613,9 +391,21 @@ contains impure subroutine s_mpi_send_random_number(phi_rn, num_freq) integer, intent(in) :: num_freq real(wp), intent(inout), dimension(1:num_freq) :: phi_rn + #ifdef MFC_MPI call MPI_BCAST(phi_rn, num_freq, mpi_p, 0, MPI_COMM_WORLD, ierr) #endif + end subroutine s_mpi_send_random_number + subroutine s_finalize_mpi_proxy_module() + +#ifdef MFC_MPI + if (ib) then + @:DEALLOCATE(ib_buff_send, ib_buff_recv) + end if +#endif + + end subroutine s_finalize_mpi_proxy_module + end module m_mpi_proxy diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 44aa3e604f..93bcd55113 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -649,7 +649,7 @@ contains call nvtxEndRange call nvtxStartRange("RHS-COMMUNICATION") - call s_populate_variables_buffers(q_prim_qp%vf, pb, mv, bc_type) + call s_populate_variables_buffers(bc_type, q_prim_qp%vf, pb, mv) call nvtxEndRange call nvtxStartRange("RHS-ELASTIC") diff --git a/src/simulation/m_sim_helpers.f90 b/src/simulation/m_sim_helpers.f90 index a05ba92bf1..5f80d9c0a2 100644 --- a/src/simulation/m_sim_helpers.f90 +++ b/src/simulation/m_sim_helpers.f90 @@ -10,8 +10,7 @@ module m_sim_helpers private; public :: s_compute_enthalpy, & s_compute_stability_from_dt, & - s_compute_dt_from_cfl, & - s_assign_default_bc_type + s_compute_dt_from_cfl contains @@ -268,26 +267,4 @@ pure subroutine s_compute_dt_from_cfl(vel, c, max_dt, rho, Re_l, j, k, l) end subroutine s_compute_dt_from_cfl - pure subroutine s_assign_default_bc_type(bc_type) - - type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type - - bc_type(1, -1)%sf(:, :, :) = bc_x%beg - bc_type(1, 1)%sf(:, :, :) = bc_x%end - !$acc update device(bc_type(1,-1)%sf, bc_type(1,1)%sf) - - if (n > 0) then - bc_type(2, -1)%sf(:, :, :) = bc_y%beg - bc_type(2, 1)%sf(:, :, :) = bc_y%end - !$acc update device(bc_type(2,-1)%sf, bc_type(2,1)%sf) - - if (p > 0) then - bc_type(3, -1)%sf(:, :, :) = bc_z%beg - bc_type(3, 1)%sf(:, :, :) = bc_z%end - !$acc update device(bc_type(3,-1)%sf, bc_type(3,1)%sf) - end if - end if - - end subroutine s_assign_default_bc_type - end module m_sim_helpers diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index dbfda5f83b..330cb75712 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -96,7 +96,6 @@ module m_start_up s_read_data_files, & s_read_serial_data_files, & s_read_parallel_data_files, & - s_populate_grid_variables_buffers, & s_initialize_internal_energy_equations, & s_initialize_modules, s_initialize_gpu_vars, & s_initialize_mpi_domain, s_finalize_modules, & @@ -959,219 +958,7 @@ contains end subroutine s_read_parallel_data_files - !> The purpose of this subroutine is to populate the buffers - !! of the grid variables, which are constituted of the cell- - !! boundary locations and cell-width distributions, based on - !! the boundary conditions. - impure subroutine s_populate_grid_variables_buffers - - integer :: i !< Generic loop iterator - - ! Population of Buffers in x-direction - - ! Populating cell-width distribution buffer, at the beginning of the - ! coordinate direction, based on the selected boundary condition. In - ! order, these are the ghost-cell extrapolation, symmetry, periodic, - ! and processor boundary conditions. - if (bc_x%beg <= BC_GHOST_EXTRAP) then - do i = 1, buff_size - dx(-i) = dx(0) - end do - elseif (bc_x%beg == BC_REFLECTIVE) then - do i = 1, buff_size - dx(-i) = dx(i - 1) - end do - elseif (bc_x%beg == BC_PERIODIC) then - do i = 1, buff_size - dx(-i) = dx(m - (i - 1)) - end do - else - call s_mpi_sendrecv_grid_variables_buffers(1, -1) - end if - - ! Computing the cell-boundary locations buffer, at the beginning of - ! the coordinate direction, from the cell-width distribution buffer - do i = 1, buff_size - x_cb(-1 - i) = x_cb(-i) - dx(-i) - end do - ! Computing the cell-center locations buffer, at the beginning of - ! the coordinate direction, from the cell-width distribution buffer - do i = 1, buff_size - x_cc(-i) = x_cc(1 - i) - (dx(1 - i) + dx(-i))/2._wp - end do - - ! Populating the cell-width distribution buffer, at the end of the - ! coordinate direction, based on desired boundary condition. These - ! include, in order, ghost-cell extrapolation, symmetry, periodic, - ! and processor boundary conditions. - if (bc_x%end <= BC_GHOST_EXTRAP) then - do i = 1, buff_size - dx(m + i) = dx(m) - end do - elseif (bc_x%end == BC_REFLECTIVE) then - do i = 1, buff_size - dx(m + i) = dx(m - (i - 1)) - end do - elseif (bc_x%end == BC_PERIODIC) then - do i = 1, buff_size - dx(m + i) = dx(i - 1) - end do - else - call s_mpi_sendrecv_grid_variables_buffers(1, 1) - end if - - ! Populating the cell-boundary locations buffer, at the end of the - ! coordinate direction, from buffer of the cell-width distribution - do i = 1, buff_size - x_cb(m + i) = x_cb(m + (i - 1)) + dx(m + i) - end do - ! Populating the cell-center locations buffer, at the end of the - ! coordinate direction, from buffer of the cell-width distribution - do i = 1, buff_size - x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2._wp - end do - - ! END: Population of Buffers in x-direction - - ! Population of Buffers in y-direction - - ! Populating cell-width distribution buffer, at the beginning of the - ! coordinate direction, based on the selected boundary condition. In - ! order, these are the ghost-cell extrapolation, symmetry, periodic, - ! and processor boundary conditions. - if (n == 0) then - return - elseif (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_AXIS) then - do i = 1, buff_size - dy(-i) = dy(0) - end do - elseif (bc_y%beg == BC_REFLECTIVE .or. bc_y%beg == BC_AXIS) then - do i = 1, buff_size - dy(-i) = dy(i - 1) - end do - elseif (bc_y%beg == BC_PERIODIC) then - do i = 1, buff_size - dy(-i) = dy(n - (i - 1)) - end do - else - call s_mpi_sendrecv_grid_variables_buffers(2, -1) - end if - - ! Computing the cell-boundary locations buffer, at the beginning of - ! the coordinate direction, from the cell-width distribution buffer - do i = 1, buff_size - y_cb(-1 - i) = y_cb(-i) - dy(-i) - end do - ! Computing the cell-center locations buffer, at the beginning of - ! the coordinate direction, from the cell-width distribution buffer - do i = 1, buff_size - y_cc(-i) = y_cc(1 - i) - (dy(1 - i) + dy(-i))/2._wp - end do - - ! Populating the cell-width distribution buffer, at the end of the - ! coordinate direction, based on desired boundary condition. These - ! include, in order, ghost-cell extrapolation, symmetry, periodic, - ! and processor boundary conditions. - if (bc_y%end <= BC_GHOST_EXTRAP) then - do i = 1, buff_size - dy(n + i) = dy(n) - end do - elseif (bc_y%end == BC_REFLECTIVE) then - do i = 1, buff_size - dy(n + i) = dy(n - (i - 1)) - end do - elseif (bc_y%end == BC_PERIODIC) then - do i = 1, buff_size - dy(n + i) = dy(i - 1) - end do - else - call s_mpi_sendrecv_grid_variables_buffers(2, 1) - end if - - ! Populating the cell-boundary locations buffer, at the end of the - ! coordinate direction, from buffer of the cell-width distribution - do i = 1, buff_size - y_cb(n + i) = y_cb(n + (i - 1)) + dy(n + i) - end do - ! Populating the cell-center locations buffer, at the end of the - ! coordinate direction, from buffer of the cell-width distribution - do i = 1, buff_size - y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2._wp - end do - - ! END: Population of Buffers in y-direction - - ! Population of Buffers in z-direction - - ! Populating cell-width distribution buffer, at the beginning of the - ! coordinate direction, based on the selected boundary condition. In - ! order, these are the ghost-cell extrapolation, symmetry, periodic, - ! and processor boundary conditions. - if (p == 0) then - return - elseif (bc_z%beg <= BC_GHOST_EXTRAP) then - do i = 1, buff_size - dz(-i) = dz(0) - end do - elseif (bc_z%beg == BC_REFLECTIVE) then - do i = 1, buff_size - dz(-i) = dz(i - 1) - end do - elseif (bc_z%beg == BC_PERIODIC) then - do i = 1, buff_size - dz(-i) = dz(p - (i - 1)) - end do - else - call s_mpi_sendrecv_grid_variables_buffers(3, -1) - end if - - ! Computing the cell-boundary locations buffer, at the beginning of - ! the coordinate direction, from the cell-width distribution buffer - do i = 1, buff_size - z_cb(-1 - i) = z_cb(-i) - dz(-i) - end do - ! Computing the cell-center locations buffer, at the beginning of - ! the coordinate direction, from the cell-width distribution buffer - do i = 1, buff_size - z_cc(-i) = z_cc(1 - i) - (dz(1 - i) + dz(-i))/2._wp - end do - - ! Populating the cell-width distribution buffer, at the end of the - ! coordinate direction, based on desired boundary condition. These - ! include, in order, ghost-cell extrapolation, symmetry, periodic, - ! and processor boundary conditions. - if (bc_z%end <= BC_GHOST_EXTRAP) then - do i = 1, buff_size - dz(p + i) = dz(p) - end do - elseif (bc_z%end == BC_REFLECTIVE) then - do i = 1, buff_size - dz(p + i) = dz(p - (i - 1)) - end do - elseif (bc_z%end == BC_PERIODIC) then - do i = 1, buff_size - dz(p + i) = dz(i - 1) - end do - else - call s_mpi_sendrecv_grid_variables_buffers(3, 1) - end if - - ! Populating the cell-boundary locations buffer, at the end of the - ! coordinate direction, from buffer of the cell-width distribution - do i = 1, buff_size - z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i) - end do - ! Populating the cell-center locations buffer, at the end of the - ! coordinate direction, from buffer of the cell-width distribution - do i = 1, buff_size - z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2._wp - end do - - ! END: Population of Buffers in z-direction - - end subroutine s_populate_grid_variables_buffers - - !> The purpose of this procedure is to initialize the + !> The purpose of this procedure is to initialize the !! values of the internal-energy equations of each phase !! from the mass of each phase, the mixture momentum and !! mixture-total-energy equations. @@ -1475,6 +1262,7 @@ contains call s_initialize_mpi_common_module() + call s_initialize_mpi_proxy_module() call s_initialize_variables_conversion_module() if (grid_geometry == 3) call s_initialize_fftw_module() call s_initialize_riemann_solvers_module() @@ -1681,6 +1469,7 @@ contains if (viscous) then call s_finalize_viscous_module() end if + call s_finalize_mpi_proxy_module() if (surface_tension) call s_finalize_surface_tension_module() if (bodyForces) call s_finalize_body_forces_module() diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 9a7f64775d..b1c338b5c9 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -34,8 +34,6 @@ module m_time_steppers use m_boundary_common - use m_boundary_conditions - use m_helper use m_sim_helpers @@ -941,7 +939,7 @@ contains elseif (bubbles_lagrange) then - call s_populate_variables_buffers(q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf, bc_type) + call s_populate_variables_buffers(bc_type, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf) call s_compute_bubble_EL_dynamics(q_prim_vf, stage) call s_transfer_data_to_tmp() call s_smear_voidfraction() diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index fc73ed0743..34cdc451fd 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -1392,7 +1392,7 @@ contains end if end if - if (bc_x%beg <= BC_GHOST_EXTRAPOLATION) then + if (bc_x%beg <= BC_GHOST_EXTRAP) then !$acc parallel loop collapse(2) gang vector default(present) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end @@ -1401,7 +1401,7 @@ contains end do end do end if - if (bc_x%end <= BC_GHOST_EXTRAPOLATION) then + if (bc_x%end <= BC_GHOST_EXTRAP) then !$acc parallel loop collapse(2) gang vector default(present) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end @@ -1411,7 +1411,7 @@ contains end do end if if (n > 0) then - if (bc_y%beg <= BC_GHOST_EXTRAPOLATION .and. bc_y%beg /= BC_NULL) then + if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_NULL) then !$acc parallel loop collapse(2) gang vector default(present) do l = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(1)%beg, idwbuff(1)%end @@ -1420,7 +1420,7 @@ contains end do end do end if - if (bc_y%end <= BC_GHOST_EXTRAPOLATION) then + if (bc_y%end <= BC_GHOST_EXTRAP) then !$acc parallel loop collapse(2) gang vector default(present) do l = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(1)%beg, idwbuff(1)%end @@ -1430,7 +1430,7 @@ contains end do end if if (p > 0) then - if (bc_z%beg <= BC_GHOST_EXTRAPOLATION) then + if (bc_z%beg <= BC_GHOST_EXTRAP) then !$acc parallel loop collapse(2) gang vector default(present) do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end @@ -1440,7 +1440,7 @@ contains end do end do end if - if (bc_z%end <= BC_GHOST_EXTRAPOLATION) then + if (bc_z%end <= BC_GHOST_EXTRAP) then !$acc parallel loop collapse(2) gang vector default(present) do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 015ea87a3f..331479a5cb 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -272,12 +272,12 @@ contains ! any contributions from outside of the physical domain during ! the WENO reconstruction if (null_weights) then - if (bc_s%beg == BC_RIEMANN_EXTRAPOLATION) then + if (bc_s%beg == BC_RIEMANN_EXTRAP) then d_cbR_${XYZ}$ (1, 0) = 0._wp; d_cbR_${XYZ}$ (0, 0) = 1._wp d_cbL_${XYZ}$ (1, 0) = 0._wp; d_cbL_${XYZ}$ (0, 0) = 1._wp end if - if (bc_s%end == BC_RIEMANN_EXTRAPOLATION) then + if (bc_s%end == BC_RIEMANN_EXTRAP) then d_cbR_${XYZ}$ (0, s) = 0._wp; d_cbR_${XYZ}$ (1, s) = 1._wp d_cbL_${XYZ}$ (0, s) = 0._wp; d_cbL_${XYZ}$ (1, s) = 1._wp end if @@ -418,14 +418,14 @@ contains ! any contributions from outside of the physical domain during ! the WENO reconstruction if (null_weights) then - if (bc_s%beg == BC_RIEMANN_EXTRAPOLATION) then + if (bc_s%beg == BC_RIEMANN_EXTRAP) then d_cbR_${XYZ}$ (1:2, 0) = 0._wp; d_cbR_${XYZ}$ (0, 0) = 1._wp d_cbL_${XYZ}$ (1:2, 0) = 0._wp; d_cbL_${XYZ}$ (0, 0) = 1._wp d_cbR_${XYZ}$ (2, 1) = 0._wp; d_cbR_${XYZ}$ (:, 1) = d_cbR_${XYZ}$ (:, 1)/sum(d_cbR_${XYZ}$ (:, 1)) d_cbL_${XYZ}$ (2, 1) = 0._wp; d_cbL_${XYZ}$ (:, 1) = d_cbL_${XYZ}$ (:, 1)/sum(d_cbL_${XYZ}$ (:, 1)) end if - if (bc_s%end == BC_RIEMANN_EXTRAPOLATION) then + if (bc_s%end == BC_RIEMANN_EXTRAP) then d_cbR_${XYZ}$ (0, s - 1) = 0._wp; d_cbR_${XYZ}$ (:, s - 1) = d_cbR_${XYZ}$ (:, s - 1)/sum(d_cbR_${XYZ}$ (:, s - 1)) d_cbL_${XYZ}$ (0, s - 1) = 0._wp; d_cbL_${XYZ}$ (:, s - 1) = d_cbL_${XYZ}$ (:, s - 1)/sum(d_cbL_${XYZ}$ (:, s - 1)) d_cbR_${XYZ}$ (0:1, s) = 0._wp; d_cbR_${XYZ}$ (2, s) = 1._wp diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index 95714f8d5e..e50858b3b4 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -60,6 +60,7 @@ def analytic(self): 'Bx0': ParamType.REAL, 'relativity': ParamType.LOG, 'cont_damage': ParamType.LOG, + 'num_bc_patches': ParamType.INT, } PRE_PROCESS = COMMON.copy() @@ -97,7 +98,6 @@ def analytic(self): 'surface_tension': ParamType.LOG, 'elliptic_smoothing': ParamType.LOG, 'elliptic_smoothing_iters': ParamType.INT, - 'num_bc_patches': ParamType.INT, 'viscous': ParamType.LOG, 'bubbles_lagrange': ParamType.LOG, })