diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp
index e146c15af4..4087a0045e 100644
--- a/src/common/m_boundary_common.fpp
+++ b/src/common/m_boundary_common.fpp
@@ -17,12 +17,14 @@ module m_boundary_common
 
     use m_constants
 
+    use m_delay_file_access
+
+    use m_compile_specific
+
     implicit none
 
     type(scalar_field), dimension(:, :), allocatable :: bc_buffers
-    !$acc declare create(bc_buffers)
-
-    real(wp) :: bcxb, bcxe, bcyb, bcye, bczb, bcze
+!$acc declare create(bc_buffers)
 
 #ifdef MFC_MPI
     integer, dimension(1:3, -1:1) :: MPI_BC_TYPE_TYPE, MPI_BC_BUFFER_TYPE
@@ -32,9 +34,15 @@ module m_boundary_common
  s_populate_variables_buffers, &
  s_create_mpi_types, &
  s_populate_capillary_buffers, &
+ s_write_serial_boundary_condition_files, &
+ s_write_parallel_boundary_condition_files, &
+ s_read_serial_boundary_condition_files, &
+ s_read_parallel_boundary_condition_files, &
+ s_assign_default_bc_type, &
+ s_populate_grid_variables_buffers, &
  s_finalize_boundary_common_module
 
-    public :: bc_buffers, bcxb, bcxe, bcyb, bcye, bczb, bcze
+    public :: bc_buffers
 
 #ifdef MFC_MPI
     public :: MPI_BC_TYPE_TYPE, MPI_BC_BUFFER_TYPE
@@ -44,11 +52,8 @@ contains
 
     impure subroutine s_initialize_boundary_common_module()
 
-        bcxb = bc_x%beg; bcxe = bc_x%end; bcyb = bc_y%beg; bcye = bc_y%end; bczb = bc_z%beg; bcze = bc_z%end
-
         @:ALLOCATE(bc_buffers(1:num_dims, -1:1))
 
-#ifndef MFC_POST_PROCESS
         if (bc_io) then
             @:ALLOCATE(bc_buffers(1, -1)%sf(1:sys_size, 0:n, 0:p))
             @:ALLOCATE(bc_buffers(1, 1)%sf(1:sys_size, 0:n, 0:p))
@@ -64,66 +69,75 @@ contains
                 end if
             end if
         end if
-#endif
 
     end subroutine s_initialize_boundary_common_module
 
     !>  The purpose of this procedure is to populate the buffers
     !!      of the primitive variables, depending on the selected
     !!      boundary conditions.
-    impure subroutine s_populate_variables_buffers(q_prim_vf, pb, mv, bc_type)
+    impure subroutine s_populate_variables_buffers(bc_type, q_prim_vf, pb, mv)
 
         type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
+        real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
         type(integer_field), dimension(1:num_dims, -1:1), intent(in) :: bc_type
 
         integer :: k, l
 
         ! Population of Buffers in x-direction
-        if (bcxb >= 0) then
-            call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 1, -1)
+        if (bc_x%beg >= 0) then
+            call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, -1, sys_size, pb, mv)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = 0, p
                 do k = 0, n
                     select case (int(bc_type(1, -1)%sf(0, k, l)))
                     case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP)
-                        call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 1, -1, k, l)
+                        call s_ghost_cell_extrapolation(q_prim_vf, 1, -1, k, l)
                     case (BC_REFLECTIVE)
-                        call s_symmetry(q_prim_vf, pb, mv, 1, -1, k, l)
+                        call s_symmetry(q_prim_vf, 1, -1, k, l, pb, mv)
                     case (BC_PERIODIC)
-                        call s_periodic(q_prim_vf, pb, mv, 1, -1, k, l)
+                        call s_periodic(q_prim_vf, 1, -1, k, l, pb, mv)
                     case (BC_SLIP_WALL)
-                        call s_slip_wall(q_prim_vf, pb, mv, 1, -1, k, l)
+                        call s_slip_wall(q_prim_vf, 1, -1, k, l)
                     case (BC_NO_SLIP_WALL)
-                        call s_no_slip_wall(q_prim_vf, pb, mv, 1, -1, k, l)
+                        call s_no_slip_wall(q_prim_vf, 1, -1, k, l)
                     case (BC_DIRICHLET)
-                        call s_dirichlet(q_prim_vf, pb, mv, 1, -1, k, l)
+                        call s_dirichlet(q_prim_vf, 1, -1, k, l)
                     end select
+
+                    if (qbmm .and. (.not. polytropic) .and. &
+                        (bc_type(1, -1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then
+                        call s_qbmm_extrapolation(1, -1, k, l, pb, mv)
+                    end if
                 end do
             end do
         end if
 
-        if (bcxe >= 0) then
-            call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 1, 1)
+        if (bc_x%end >= 0) then
+            call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, 1, sys_size, pb, mv)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = 0, p
                 do k = 0, n
                     select case (int(bc_type(1, 1)%sf(0, k, l)))
                     case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) ! Ghost-cell extrap. BC at end
-                        call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 1, 1, k, l)
+                        call s_ghost_cell_extrapolation(q_prim_vf, 1, 1, k, l)
                     case (BC_REFLECTIVE)
-                        call s_symmetry(q_prim_vf, pb, mv, 1, 1, k, l)
+                        call s_symmetry(q_prim_vf, 1, 1, k, l, pb, mv)
                     case (BC_PERIODIC)
-                        call s_periodic(q_prim_vf, pb, mv, 1, 1, k, l)
+                        call s_periodic(q_prim_vf, 1, 1, k, l, pb, mv)
                     case (BC_SLIP_WALL)
-                        call s_slip_wall(q_prim_vf, pb, mv, 1, 1, k, l)
+                        call s_slip_wall(q_prim_vf, 1, 1, k, l)
                     case (BC_NO_SLIP_WALL)
-                        call s_no_slip_wall(q_prim_vf, pb, mv, 1, 1, k, l)
+                        call s_no_slip_wall(q_prim_vf, 1, 1, k, l)
                     case (BC_DIRICHLET)
-                        call s_dirichlet(q_prim_vf, pb, mv, 1, 1, k, l)
+                        call s_dirichlet(q_prim_vf, 1, 1, k, l)
                     end select
+
+                    if (qbmm .and. (.not. polytropic) .and. &
+                        (bc_type(1, 1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then
+                        call s_qbmm_extrapolation(1, 1, k, l, pb, mv)
+                    end if
                 end do
             end do
         end if
@@ -132,52 +146,63 @@ contains
 
         if (n == 0) return
 
-        if (bcyb >= 0) then
-            call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 2, -1)
+        if (bc_y%beg >= 0) then
+            call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, -1, sys_size, pb, mv)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = 0, p
                 do k = -buff_size, m + buff_size
                     select case (int(bc_type(2, -1)%sf(k, 0, l)))
                     case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP)
-                        call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 2, -1, k, l)
+                        call s_ghost_cell_extrapolation(q_prim_vf, 2, -1, k, l)
                     case (BC_AXIS)
                         call s_axis(q_prim_vf, pb, mv, k, l)
                     case (BC_REFLECTIVE)
-                        call s_symmetry(q_prim_vf, pb, mv, 2, -1, k, l)
+                        call s_symmetry(q_prim_vf, 2, -1, k, l, pb, mv)
                     case (BC_PERIODIC)
-                        call s_periodic(q_prim_vf, pb, mv, 2, -1, k, l)
+                        call s_periodic(q_prim_vf, 2, -1, k, l, pb, mv)
                     case (BC_SLIP_WALL)
-                        call s_slip_wall(q_prim_vf, pb, mv, 2, -1, k, l)
+                        call s_slip_wall(q_prim_vf, 2, -1, k, l)
                     case (BC_NO_SLIP_WALL)
-                        call s_no_slip_wall(q_prim_vf, pb, mv, 2, -1, k, l)
+                        call s_no_slip_wall(q_prim_vf, 2, -1, k, l)
                     case (BC_DIRICHLET)
-                        call s_dirichlet(q_prim_vf, pb, mv, 2, -1, k, l)
+                        call s_dirichlet(q_prim_vf, 2, -1, k, l)
                     end select
+
+                    if (qbmm .and. (.not. polytropic) .and. &
+                        (bc_type(2, -1)%sf(k, 0, l) <= BC_GHOST_EXTRAP) .and. &
+                        (bc_type(2, -1)%sf(k, 0, l) /= BC_AXIS)) then
+                        call s_qbmm_extrapolation(2, -1, k, l, pb, mv)
+                    end if
                 end do
             end do
         end if
 
-        if (bcye >= 0) then
-            call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 2, 1)
+        if (bc_y%end >= 0) then
+            call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, 1, sys_size, pb, mv)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = 0, p
                 do k = -buff_size, m + buff_size
                     select case (int(bc_type(2, 1)%sf(k, 0, l)))
                     case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP)
-                        call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 2, 1, k, l)
+                        call s_ghost_cell_extrapolation(q_prim_vf, 2, 1, k, l)
                     case (BC_REFLECTIVE)
-                        call s_symmetry(q_prim_vf, pb, mv, 2, 1, k, l)
+                        call s_symmetry(q_prim_vf, 2, 1, k, l, pb, mv)
                     case (BC_PERIODIC)
-                        call s_periodic(q_prim_vf, pb, mv, 2, 1, k, l)
+                        call s_periodic(q_prim_vf, 2, 1, k, l, pb, mv)
                     case (BC_SLIP_WALL)
-                        call s_slip_wall(q_prim_vf, pb, mv, 2, 1, k, l)
+                        call s_slip_wall(q_prim_vf, 2, 1, k, l)
                     case (BC_NO_SLIP_WALL)
-                        call s_no_slip_wall(q_prim_vf, pb, mv, 2, 1, k, l)
+                        call s_no_slip_wall(q_prim_vf, 2, 1, k, l)
                     case (BC_DIRICHLET)
-                        call s_dirichlet(q_prim_vf, pb, mv, 2, 1, k, l)
+                        call s_dirichlet(q_prim_vf, 2, 1, k, l)
                     end select
+
+                    if (qbmm .and. (.not. polytropic) .and. &
+                        (bc_type(2, 1)%sf(k, 0, l) <= BC_GHOST_EXTRAP)) then
+                        call s_qbmm_extrapolation(2, 1, k, l, pb, mv)
+                    end if
                 end do
             end do
         end if
@@ -186,50 +211,60 @@ contains
 
         if (p == 0) return
 
-        if (bczb >= 0) then
-            call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 3, -1)
+        if (bc_z%beg >= 0) then
+            call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, -1, sys_size, pb, mv)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = -buff_size, n + buff_size
                 do k = -buff_size, m + buff_size
                     select case (int(bc_type(3, -1)%sf(k, l, 0)))
                     case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP)
-                        call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 3, -1, k, l)
+                        call s_ghost_cell_extrapolation(q_prim_vf, 3, -1, k, l)
                     case (BC_REFLECTIVE)
-                        call s_symmetry(q_prim_vf, pb, mv, 3, -1, k, l)
+                        call s_symmetry(q_prim_vf, 3, -1, k, l, pb, mv)
                     case (BC_PERIODIC)
-                        call s_periodic(q_prim_vf, pb, mv, 3, -1, k, l)
+                        call s_periodic(q_prim_vf, 3, -1, k, l, pb, mv)
                     case (BC_SLIP_WALL)
-                        call s_slip_wall(q_prim_vf, pb, mv, 3, -1, k, l)
+                        call s_slip_wall(q_prim_vf, 3, -1, k, l)
                     case (BC_NO_SLIP_WALL)
-                        call s_no_slip_wall(q_prim_vf, pb, mv, 3, -1, k, l)
+                        call s_no_slip_wall(q_prim_vf, 3, -1, k, l)
                     case (BC_DIRICHLET)
-                        call s_dirichlet(q_prim_vf, pb, mv, 3, -1, k, l)
+                        call s_dirichlet(q_prim_vf, 3, -1, k, l)
                     end select
+
+                    if (qbmm .and. (.not. polytropic) .and. &
+                        (bc_type(3, -1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then
+                        call s_qbmm_extrapolation(3, -1, k, l, pb, mv)
+                    end if
                 end do
             end do
         end if
 
-        if (bcze >= 0) then
-            call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 3, 1)
+        if (bc_z%end >= 0) then
+            call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, 1, sys_size, pb, mv)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = -buff_size, n + buff_size
                 do k = -buff_size, m + buff_size
                     select case (int(bc_type(3, 1)%sf(k, l, 0)))
                     case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP)
-                        call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 3, 1, k, l)
+                        call s_ghost_cell_extrapolation(q_prim_vf, 3, 1, k, l)
                     case (BC_REFLECTIVE)
-                        call s_symmetry(q_prim_vf, pb, mv, 3, 1, k, l)
+                        call s_symmetry(q_prim_vf, 3, 1, k, l, pb, mv)
                     case (BC_PERIODIC)
-                        call s_periodic(q_prim_vf, pb, mv, 3, 1, k, l)
+                        call s_periodic(q_prim_vf, 3, 1, k, l, pb, mv)
                     case (BC_SlIP_WALL)
-                        call s_slip_wall(q_prim_vf, pb, mv, 3, 1, k, l)
+                        call s_slip_wall(q_prim_vf, 3, 1, k, l)
                     case (BC_NO_SLIP_WALL)
-                        call s_no_slip_wall(q_prim_vf, pb, mv, 3, 1, k, l)
+                        call s_no_slip_wall(q_prim_vf, 3, 1, k, l)
                     case (BC_DIRICHLET)
-                        call s_dirichlet(q_prim_vf, pb, mv, 3, 1, k, l)
+                        call s_dirichlet(q_prim_vf, 3, 1, k, l)
                     end select
+
+                    if (qbmm .and. (.not. polytropic) .and. &
+                        (bc_type(3, 1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then
+                        call s_qbmm_extrapolation(3, 1, k, l, pb, mv)
+                    end if
                 end do
             end do
         end if
@@ -237,23 +272,18 @@ contains
 
     end subroutine s_populate_variables_buffers
 
-    pure subroutine s_ghost_cell_extrapolation(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l)
+    pure subroutine s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l)
 #ifdef _CRAYFTN
         !DIR$ INLINEALWAYS s_ghost_cell_extrapolation
 #else
         !$acc routine seq
 #endif
         type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
         integer, intent(in) :: bc_dir, bc_loc
         integer, intent(in) :: k, l
 
         integer :: j, i
 
-        if (qbmm .and. .not. polytropic) then
-            call s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l)
-        end if
-
         if (bc_dir == 1) then !< x-direction
             if (bc_loc == -1) then !bc_x%beg
                 do i = 1, sys_size
@@ -306,14 +336,10 @@ contains
 
     end subroutine s_ghost_cell_extrapolation
 
-    pure subroutine s_symmetry(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l)
-#ifdef _CRAYFTN
-        !DIR$ INLINEALWAYS s_symmetry
-#else
+    pure subroutine s_symmetry(q_prim_vf, bc_dir, bc_loc, k, l, pb, mv)
         !$acc routine seq
-#endif
         type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
+        real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
         integer, intent(in) :: bc_dir, bc_loc
         integer, intent(in) :: k, l
 
@@ -570,14 +596,10 @@ contains
 
     end subroutine s_symmetry
 
-    pure subroutine s_periodic(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l)
-#ifdef _CRAYFTN
-        !DIR$ INLINEALWAYS s_periodic
-#else
+    pure subroutine s_periodic(q_prim_vf, bc_dir, bc_loc, k, l, pb, mv)
         !$acc routine seq
-#endif
         type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
+        real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
         integer, intent(in) :: bc_dir, bc_loc
         integer, intent(in) :: k, l
 
@@ -714,11 +736,7 @@ contains
     end subroutine s_periodic
 
     pure subroutine s_axis(q_prim_vf, pb, mv, k, l)
-#ifdef _CRAYFTN
-        !DIR$ INLINEALWAYS s_axis
-#else
         !$acc routine seq
-#endif
         type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
         real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
         integer, intent(in) :: k, l
@@ -776,23 +794,18 @@ contains
 
     end subroutine s_axis
 
-    pure subroutine s_slip_wall(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l)
+    pure subroutine s_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l)
 #ifdef _CRAYFTN
         !DIR$ INLINEALWAYS s_slip_wall
 #else
         !$acc routine seq
 #endif
         type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
         integer, intent(in) :: bc_dir, bc_loc
         integer, intent(in) :: k, l
 
         integer :: j, i
 
-        if (qbmm .and. .not. polytropic) then
-            call s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l)
-        end if
-
         if (bc_dir == 1) then !< x-direction
             if (bc_loc == -1) then !< bc_x%beg
                 do i = 1, sys_size
@@ -875,23 +888,18 @@ contains
 
     end subroutine s_slip_wall
 
-    pure subroutine s_no_slip_wall(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l)
+    pure subroutine s_no_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l)
 #ifdef _CRAYFTN
         !DIR$ INLINEALWAYS s_no_slip_wall
 #else
         !$acc routine seq
 #endif
         type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
         integer, intent(in) :: bc_dir, bc_loc
         integer, intent(in) :: k, l
 
         integer :: j, i
 
-        if (qbmm .and. .not. polytropic) then
-            call s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l)
-        end if
-
         if (bc_dir == 1) then !< x-direction
             if (bc_loc == -1) then !< bc_x%beg
                 do i = 1, sys_size
@@ -1010,22 +1018,19 @@ contains
 
     end subroutine s_no_slip_wall
 
-    pure subroutine s_dirichlet(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l)
+    pure subroutine s_dirichlet(q_prim_vf, bc_dir, bc_loc, k, l)
 #ifdef _CRAYFTN
         !DIR$ INLINEALWAYS s_dirichlet
 #else
         !$acc routine seq
 #endif
         type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
         integer, intent(in) :: bc_dir, bc_loc
         integer, intent(in) :: k, l
 
         integer :: j, i
 
-#ifdef MFC_PRE_PROCESS
-        call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 1, -1, k, l)
-#else
+#ifdef MFC_SIMULATION
         if (bc_dir == 1) then !< x-direction
             if (bc_loc == -1) then !bc_x%beg
                 do i = 1, sys_size
@@ -1075,17 +1080,15 @@ contains
                 end do
             end if
         end if
+#else
+        call s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l)
 #endif
 
     end subroutine s_dirichlet
 
-    pure subroutine s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l)
-#ifdef _CRAYFTN
-        !DIR$ INLINEALWAYS s_qbmm_extrapolation
-#else
+    pure subroutine s_qbmm_extrapolation(bc_dir, bc_loc, k, l, pb, mv)
         !$acc routine seq
-#endif
-        real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
+        real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
         integer, intent(in) :: bc_dir, bc_loc
         integer, intent(in) :: k, l
 
@@ -1163,8 +1166,8 @@ contains
         integer :: k, l
 
         !< x-direction
-        if (bcxb >= 0) then
-            call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, -1)
+        if (bc_x%beg >= 0) then
+            call s_mpi_sendrecv_variables_buffers(c_divs, 1, -1, num_dims + 1)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = 0, p
@@ -1181,8 +1184,8 @@ contains
             end do
         end if
 
-        if (bcxe >= 0) then
-            call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, 1)
+        if (bc_x%end >= 0) then
+            call s_mpi_sendrecv_variables_buffers(c_divs, 1, 1, num_dims + 1)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = 0, p
@@ -1202,8 +1205,8 @@ contains
         if (n == 0) return
 
         !< y-direction
-        if (bcyb >= 0) then
-            call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, -1)
+        if (bc_y%beg >= 0) then
+            call s_mpi_sendrecv_variables_buffers(c_divs, 2, -1, num_dims + 1)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = 0, p
@@ -1220,8 +1223,8 @@ contains
             end do
         end if
 
-        if (bcye >= 0) then
-            call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, 1)
+        if (bc_y%end >= 0) then
+            call s_mpi_sendrecv_variables_buffers(c_divs, 2, 1, num_dims + 1)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = 0, p
@@ -1241,8 +1244,8 @@ contains
         if (p == 0) return
 
         !< z-direction
-        if (bczb >= 0) then
-            call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, -1)
+        if (bc_z%beg >= 0) then
+            call s_mpi_sendrecv_variables_buffers(c_divs, 3, -1, num_dims + 1)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = -buff_size, n + buff_size
@@ -1259,8 +1262,8 @@ contains
             end do
         end if
 
-        if (bcze >= 0) then
-            call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, 1)
+        if (bc_z%end >= 0) then
+            call s_mpi_sendrecv_variables_buffers(c_divs, 3, 1, num_dims + 1)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = -buff_size, n + buff_size
@@ -1509,9 +1512,469 @@ contains
 #endif
     end subroutine s_create_mpi_types
 
-    impure subroutine s_finalize_boundary_common_module()
+    subroutine s_write_serial_boundary_condition_files(q_prim_vf, bc_type, step_dirpath, old_grid)
+
+        type(scalar_field), dimension(sys_size) :: q_prim_vf
+        type(integer_field), dimension(1:num_dims, -1:1) :: bc_type
+        logical :: old_grid
+
+        character(LEN=*), intent(in) :: step_dirpath
+
+        integer :: dir, loc, i
+        character(len=path_len) :: file_path
+
+        character(len=10) :: status
+
+        if (old_grid) then
+            status = 'old'
+        else
+            status = 'new'
+        end if
+
+        call s_pack_boundary_condition_buffers(q_prim_vf)
+
+        file_path = trim(step_dirpath)//'/bc_type.dat'
+        open (1, FILE=trim(file_path), FORM='unformatted', STATUS=status)
+        do dir = 1, num_dims
+            do loc = -1, 1, 2
+                write (1) bc_type(dir, loc)%sf
+            end do
+        end do
+        close (1)
+
+        file_path = trim(step_dirpath)//'/bc_buffers.dat'
+        open (1, FILE=trim(file_path), FORM='unformatted', STATUS=status)
+        do dir = 1, num_dims
+            do loc = -1, 1, 2
+                write (1) bc_buffers(dir, loc)%sf
+            end do
+        end do
+        close (1)
+
+    end subroutine s_write_serial_boundary_condition_files
+
+    subroutine s_write_parallel_boundary_condition_files(q_prim_vf, bc_type)
+
+        type(scalar_field), dimension(sys_size) :: q_prim_vf
+        type(integer_field), dimension(1:num_dims, -1:1) :: bc_type
+
+        integer :: dir, loc
+        character(len=path_len) :: file_loc, file_path
+
+        character(len=10) :: status
+
+#ifdef MFC_MPI
+        integer :: ierr
+        integer :: file_id
+        integer :: offset
+        character(len=7) :: proc_rank_str
+        logical :: dir_check
+
+        call s_pack_boundary_condition_buffers(q_prim_vf)
+
+        file_loc = trim(case_dir)//'/restart_data/boundary_conditions'
+        if (proc_rank == 0) then
+            call my_inquire(file_loc, dir_check)
+            if (dir_check .neqv. .true.) then
+                call s_create_directory(trim(file_loc))
+            end if
+        end if
+
+        call s_create_mpi_types(bc_type)
+
+        call s_mpi_barrier()
+
+        call DelayFileAccess(proc_rank)
+
+        write (proc_rank_str, '(I7.7)') proc_rank
+        file_path = trim(file_loc)//'/bc_'//trim(proc_rank_str)//'.dat'
+        call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_CREATE + MPI_MODE_WRONLY, MPI_INFO_NULL, file_id, ierr)
+
+        offset = 0
+
+        ! Write bc_types
+        do dir = 1, num_dims
+            do loc = -1, 1, 2
+                call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr)
+                call MPI_File_write_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr)
+                offset = offset + sizeof(bc_type(dir, loc)%sf)
+            end do
+        end do
+
+        ! Write bc_buffers
+        do dir = 1, num_dims
+            do loc = -1, 1, 2
+                call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr)
+                call MPI_File_write_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr)
+                offset = offset + sizeof(bc_buffers(dir, loc)%sf)
+            end do
+        end do
+
+        call MPI_File_close(file_id, ierr)
+#endif
+
+    end subroutine s_write_parallel_boundary_condition_files
+
+    subroutine s_read_serial_boundary_condition_files(step_dirpath, bc_type)
+
+        character(LEN=*), intent(in) :: step_dirpath
+
+        type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type
+
+        integer :: dir, loc
+        logical :: file_exist
+        character(len=path_len) :: file_path
+
+        character(len=10) :: status
+
+        ! Read bc_types
+        file_path = trim(step_dirpath)//'/bc_type.dat'
+        inquire (FILE=trim(file_path), EXIST=file_exist)
+        if (.not. file_exist) then
+            call s_mpi_abort(trim(file_path)//' is missing. Exiting.')
+        end if
+
+        open (1, FILE=trim(file_path), FORM='unformatted', STATUS='unknown')
+        do dir = 1, num_dims
+            do loc = -1, 1, 2
+                read (1) bc_type(dir, loc)%sf
+                !$acc update device(bc_type(dir, loc)%sf)
+            end do
+        end do
+        close (1)
+
+        ! Read bc_buffers
+        file_path = trim(step_dirpath)//'/bc_buffers.dat'
+        inquire (FILE=trim(file_path), EXIST=file_exist)
+        if (.not. file_exist) then
+            call s_mpi_abort(trim(file_path)//' is missing. Exiting.')
+        end if
+
+        open (1, FILE=trim(file_path), FORM='unformatted', STATUS='unknown')
+        do dir = 1, num_dims
+            do loc = -1, 1, 2
+                read (1) bc_buffers(dir, loc)%sf
+                !$acc update device(bc_buffers(dir, loc)%sf)
+            end do
+        end do
+        close (1)
+
+    end subroutine s_read_serial_boundary_condition_files
+
+    subroutine s_read_parallel_boundary_condition_files(bc_type)
+
+        type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type
+
+        integer :: dir, loc
+        character(len=path_len) :: file_loc, file_path
+
+        character(len=10) :: status
+
+#ifdef MFC_MPI
+        integer :: ierr
+        integer :: file_id
+        integer :: offset
+        character(len=7) :: proc_rank_str
+        logical :: dir_check
+
+        file_loc = trim(case_dir)//'/restart_data/boundary_conditions'
+
+        if (proc_rank == 0) then
+            call my_inquire(file_loc, dir_check)
+            if (dir_check .neqv. .true.) then
+                call s_mpi_abort(trim(file_loc)//' is missing. Exiting.')
+            end if
+        end if
+
+        call s_create_mpi_types(bc_type)
+
+        call s_mpi_barrier()
+
+        call DelayFileAccess(proc_rank)
+
+        write (proc_rank_str, '(I7.7)') proc_rank
+        file_path = trim(file_loc)//'/bc_'//trim(proc_rank_str)//'.dat'
+        call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_RDONLY, MPI_INFO_NULL, file_id, ierr)
+
+        offset = 0
+
+        ! Read bc_types
+        do dir = 1, num_dims
+            do loc = -1, 1, 2
+                call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr)
+                call MPI_File_read_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr)
+                offset = offset + sizeof(bc_type(dir, loc)%sf)
+                !$acc update device(bc_type(dir, loc)%sf)
+            end do
+        end do
+
+        ! Read bc_buffers
+        do dir = 1, num_dims
+            do loc = -1, 1, 2
+                call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr)
+                call MPI_File_read_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr)
+                offset = offset + sizeof(bc_buffers(dir, loc)%sf)
+                !$acc update device(bc_buffers(dir, loc)%sf)
+            end do
+        end do
+
+        call MPI_File_close(file_id, ierr)
+#endif
+
+    end subroutine s_read_parallel_boundary_condition_files
+
+    subroutine s_pack_boundary_condition_buffers(q_prim_vf)
+
+        type(scalar_field), dimension(sys_size) :: q_prim_vf
+        integer :: i, j, k
+
+        do k = 0, p
+            do j = 0, n
+                do i = 1, sys_size
+                    bc_buffers(1, -1)%sf(i, j, k) = q_prim_vf(i)%sf(0, j, k)
+                    bc_buffers(1, 1)%sf(i, j, k) = q_prim_vf(i)%sf(m, j, k)
+                end do
+            end do
+        end do
+
+        if (n > 0) then
+            do k = 0, p
+                do j = 1, sys_size
+                    do i = 0, m
+                        bc_buffers(2, -1)%sf(i, j, k) = q_prim_vf(j)%sf(i, 0, k)
+                        bc_buffers(2, 1)%sf(i, j, k) = q_prim_vf(j)%sf(i, n, k)
+                    end do
+                end do
+            end do
+
+            if (p > 0) then
+                do k = 1, sys_size
+                    do j = 0, n
+                        do i = 0, m
+                            bc_buffers(3, -1)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, 0)
+                            bc_buffers(3, 1)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, p)
+                        end do
+                    end do
+                end do
+            end if
+        end if
+
+    end subroutine s_pack_boundary_condition_buffers
+
+    subroutine s_assign_default_bc_type(bc_type)
+
+        type(integer_field), dimension(1:num_dims, -1:1), intent(in) :: bc_type
+
+        bc_type(1, -1)%sf(:, :, :) = bc_x%beg
+        bc_type(1, 1)%sf(:, :, :) = bc_x%end
+        !$acc update device(bc_type(1,-1)%sf, bc_type(1,1)%sf)
+
+        if (n > 0) then
+            bc_type(2, -1)%sf(:, :, :) = bc_y%beg
+            bc_type(2, 1)%sf(:, :, :) = bc_y%end
+            !$acc update device(bc_type(2,-1)%sf, bc_type(2,1)%sf)
+
+            if (p > 0) then
+                bc_type(3, -1)%sf(:, :, :) = bc_z%beg
+                bc_type(3, 1)%sf(:, :, :) = bc_z%end
+                !$acc update device(bc_type(3,-1)%sf, bc_type(3,1)%sf)
+            end if
+        end if
+
+    end subroutine s_assign_default_bc_type
+
+    !> The purpose of this subroutine is to populate the buffers
+        !!          of the grid variables, which are constituted of the cell-
+        !!          boundary locations and cell-width distributions, based on
+        !!          the boundary conditions.
+    subroutine s_populate_grid_variables_buffers
+
+        integer :: i !< Generic loop iterator
+
+#ifdef MFC_SIMULATION
+        ! Required for compatibility between codes
+        type(int_bounds_info) :: offset_x, offset_y, offset_z
+        offset_x%beg = buff_size; offset_x%end = buff_size
+        offset_y%beg = buff_size; offset_y%end = buff_size
+        offset_z%beg = buff_size; offset_z%end = buff_size
+#endif
+
+#ifndef MFC_PRE_PROCESS
+        ! Population of Buffers in x-direction
+
+        ! Populating cell-width distribution buffer at bc_x%beg
+        if (bc_x%beg >= 0) then
+            call s_mpi_sendrecv_grid_variables_buffers(1, -1)
+        elseif (bc_x%beg <= BC_GHOST_EXTRAP) then
+            do i = 1, buff_size
+                dx(-i) = dx(0)
+            end do
+        elseif (bc_x%beg == BC_REFLECTIVE) then
+            do i = 1, buff_size
+                dx(-i) = dx(i - 1)
+            end do
+        elseif (bc_x%beg == BC_PERIODIC) then
+            do i = 1, buff_size
+                dx(-i) = dx(m - (i - 1))
+            end do
+        end if
+
+        ! Computing the cell-boundary and center locations buffer at bc_x%beg
+        do i = 1, offset_x%beg
+            x_cb(-1 - i) = x_cb(-i) - dx(-i)
+        end do
+
+        do i = 1, buff_size
+            x_cc(-i) = x_cc(1 - i) - (dx(1 - i) + dx(-i))/2._wp
+        end do
+
+        ! Populating the cell-width distribution buffer at bc_x%end
+        if (bc_x%end >= 0) then
+            call s_mpi_sendrecv_grid_variables_buffers(1, 1)
+        elseif (bc_x%end <= BC_GHOST_EXTRAP) then
+            do i = 1, buff_size
+                dx(m + i) = dx(m)
+            end do
+        elseif (bc_x%end == BC_REFLECTIVE) then
+            do i = 1, buff_size
+                dx(m + i) = dx(m - (i - 1))
+            end do
+        elseif (bc_x%end == BC_PERIODIC) then
+            do i = 1, buff_size
+                dx(m + i) = dx(i - 1)
+            end do
+        end if
+
+        ! Populating the cell-boundary and center locations buffer at bc_x%end
+        do i = 1, offset_x%end
+            x_cb(m + i) = x_cb(m + (i - 1)) + dx(m + i)
+        end do
+
+        do i = 1, buff_size
+            x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2._wp
+        end do
+        ! END: Population of Buffers in x-direction
+
+        ! Population of Buffers in y-direction
+
+        ! Populating cell-width distribution buffer at bc_y%beg
+        if (n == 0) then
+            return
+        elseif (bc_y%beg >= 0) then
+            call s_mpi_sendrecv_grid_variables_buffers(2, -1)
+        elseif (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_AXIS) then
+            do i = 1, buff_size
+                dy(-i) = dy(0)
+            end do
+        elseif (bc_y%beg == BC_REFLECTIVE .or. bc_y%beg == BC_AXIS) then
+            do i = 1, buff_size
+                dy(-i) = dy(i - 1)
+            end do
+        elseif (bc_y%beg == BC_PERIODIC) then
+            do i = 1, buff_size
+                dy(-i) = dy(n - (i - 1))
+            end do
+        end if
+
+        ! Computing the cell-boundary and center locations buffer at bc_y%beg
+        do i = 1, offset_y%beg
+            y_cb(-1 - i) = y_cb(-i) - dy(-i)
+        end do
+
+        do i = 1, buff_size
+            y_cc(-i) = y_cc(1 - i) - (dy(1 - i) + dy(-i))/2._wp
+        end do
+
+        ! Populating the cell-width distribution buffer at bc_y%end
+        if (bc_y%end >= 0) then
+            call s_mpi_sendrecv_grid_variables_buffers(2, 1)
+        elseif (bc_y%end <= BC_GHOST_EXTRAP) then
+            do i = 1, buff_size
+                dy(n + i) = dy(n)
+            end do
+        elseif (bc_y%end == BC_REFLECTIVE) then
+            do i = 1, buff_size
+                dy(n + i) = dy(n - (i - 1))
+            end do
+        elseif (bc_y%end == BC_PERIODIC) then
+            do i = 1, buff_size
+                dy(n + i) = dy(i - 1)
+            end do
+        end if
+
+        ! Populating the cell-boundary and center locations buffer at bc_y%end
+        do i = 1, offset_y%end
+            y_cb(n + i) = y_cb(n + (i - 1)) + dy(n + i)
+        end do
+
+        do i = 1, buff_size
+            y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2._wp
+        end do
+        ! END: Population of Buffers in y-direction
+
+        ! Population of Buffers in z-direction
+
+        ! Populating cell-width distribution buffer at bc_z%beg
+        if (p == 0) then
+            return
+        elseif (Bc_z%beg >= 0) then
+            call s_mpi_sendrecv_grid_variables_buffers(3, -1)
+        elseif (bc_z%beg <= BC_GHOST_EXTRAP) then
+            do i = 1, buff_size
+                dz(-i) = dz(0)
+            end do
+        elseif (bc_z%beg == BC_REFLECTIVE) then
+            do i = 1, buff_size
+                dz(-i) = dz(i - 1)
+            end do
+        elseif (bc_z%beg == BC_PERIODIC) then
+            do i = 1, buff_size
+                dz(-i) = dz(p - (i - 1))
+            end do
+        end if
+
+        ! Computing the cell-boundary and center locations buffer at bc_z%beg
+        do i = 1, offset_z%beg
+            z_cb(-1 - i) = z_cb(-i) - dz(-i)
+        end do
+
+        do i = 1, buff_size
+            z_cc(-i) = z_cc(1 - i) - (dz(1 - i) + dz(-i))/2._wp
+        end do
+
+        ! Populating the cell-width distribution buffer at bc_z%end
+        if (bc_z%end >= 0) then
+            call s_mpi_sendrecv_grid_variables_buffers(3, 1)
+        elseif (bc_z%end <= BC_GHOST_EXTRAP) then
+            do i = 1, buff_size
+                dz(p + i) = dz(p)
+            end do
+        elseif (bc_z%end == BC_REFLECTIVE) then
+            do i = 1, buff_size
+                dz(p + i) = dz(p - (i - 1))
+            end do
+        elseif (bc_z%end == BC_PERIODIC) then
+            do i = 1, buff_size
+                dz(p + i) = dz(i - 1)
+            end do
+        end if
+
+        ! Populating the cell-boundary and center locations buffer at bc_z%end
+        do i = 1, offset_z%end
+            z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i)
+        end do
+
+        do i = 1, buff_size
+            z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2._wp
+        end do
+        ! END: Population of Buffers in z-direction
+
+#endif
+
+    end subroutine s_populate_grid_variables_buffers
+
+    subroutine s_finalize_boundary_common_module()
 
-#ifndef MFC_POST_PROCESS
         if (bc_io) then
             deallocate (bc_buffers(1, -1)%sf)
             deallocate (bc_buffers(1, 1)%sf)
@@ -1524,7 +1987,7 @@ contains
                 end if
             end if
         end if
-#endif
+
         deallocate (bc_buffers)
 
     end subroutine s_finalize_boundary_common_module
diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index b439ce1c6d..4a96787e15 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -24,24 +24,24 @@ module m_mpi_common
 
     implicit none
 
-    integer, private :: err_code, ierr, v_size !<
+    integer, private :: ierr, v_size !<
     !$acc declare create(v_size)
     !! Generic flags used to identify and report MPI errors
 
-    real(wp), private, allocatable, dimension(:), target :: buff_send !<
+    real(wp), private, allocatable, dimension(:) :: buff_send !<
     !! This variable is utilized to pack and send the buffer of the cell-average
     !! primitive variables, for a single computational domain boundary at the
     !! time, to the relevant neighboring processor.
 
-    real(wp), private, allocatable, dimension(:), target :: buff_recv !<
+    real(wp), private, allocatable, dimension(:) :: buff_recv !<
     !! buff_recv is utilized to receive and unpack the buffer of the cell-
     !! average primitive variables, for a single computational domain boundary
     !! at the time, from the relevant neighboring processor.
 
     !$acc declare create(buff_send, buff_recv)
 
-    integer :: halo_size, nVars
-    !$acc declare create(halo_size, nVars)
+    integer :: halo_size
+    !$acc declare create(halo_size)
 
 contains
 
@@ -51,46 +51,34 @@ contains
     impure subroutine s_initialize_mpi_common_module
 
 #ifdef MFC_MPI
-        ! Allocating buff_send/recv and ib_buff_send/recv. Please note that
-        ! for the sake of simplicity, both variables are provided sufficient
-        ! storage to hold the largest buffer in the computational domain.
+        ! Allocating buff_send/recv and. Please note that for the sake of
+        ! simplicity, both variables are provided sufficient storage to hold
+        ! the largest buffer in the computational domain.
+
         if (qbmm .and. .not. polytropic) then
-            if (n > 0) then
-                if (p > 0) then
-                    halo_size = -1 + buff_size*(sys_size + 2*nb*4)* &
-                                             & (m + 2*buff_size + 1)* &
-                                             & (n + 2*buff_size + 1)* &
-                                             & (p + 2*buff_size + 1)/ &
-                                             & (min(m, n, p) + 2*buff_size + 1)
-                else
-                    halo_size = -1 + buff_size*(sys_size + 2*nb*4)* &
-                                             & (max(m, n) + 2*buff_size + 1)
-                end if
-            else
-                halo_size = -1 + buff_size*(sys_size + 2*nb*4)
-            end if
+            v_size = sys_size + 2*nb*4
         else
-            if (n > 0) then
-                if (p > 0) then
-                    halo_size = -1 + buff_size*sys_size* &
-                                            & (m + 2*buff_size + 1)* &
-                                            & (n + 2*buff_size + 1)* &
-                                            & (p + 2*buff_size + 1)/ &
-                                            & (min(m, n, p) + 2*buff_size + 1)
-                else
-                    halo_size = -1 + buff_size*sys_size* &
-                                            & (max(m, n) + 2*buff_size + 1)
-                end if
+            v_size = sys_size
+        end if
+
+        if (n > 0) then
+            if (p > 0) then
+                halo_size = nint(-1._wp + 1._wp*buff_size*(v_size)* &
+                                         & (m + 2*buff_size + 1)* &
+                                         & (n + 2*buff_size + 1)* &
+                                         & (p + 2*buff_size + 1)/ &
+                                         & (min(m, n, p) + 2*buff_size + 1))
             else
-                halo_size = -1 + buff_size*sys_size
+                halo_size = -1 + buff_size*(v_size)* &
+                                         & (max(m, n) + 2*buff_size + 1)
             end if
+        else
+            halo_size = -1 + buff_size*(v_size)
         end if
 
-        v_size = sys_size
+        !$acc update device(halo_size, v_size)
 
-        allocate (buff_send(0:halo_size))
-
-        allocate (buff_recv(0:ubound(buff_send, 1)))
+        @:ALLOCATE(buff_send(0:halo_size), buff_recv(0:halo_size))
 #endif
 
     end subroutine s_initialize_mpi_common_module
@@ -135,24 +123,11 @@ contains
     !! @param beta Eulerian void fraction from lagrangian bubbles
     impure subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm, beta)
 
-        type(scalar_field), &
-            dimension(sys_size), &
-            intent(in) :: q_cons_vf
-
-        type(integer_field), &
-            optional, &
-            intent(in) :: ib_markers
-
-        type(levelset_field), &
-            optional, &
-            intent(IN) :: levelset
-
-        type(levelset_norm_field), &
-            optional, &
-            intent(IN) :: levelset_norm
-
-        type(scalar_field), &
-            intent(in), optional :: beta
+        type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf
+        type(integer_field), optional, intent(in) :: ib_markers
+        type(levelset_field), optional, intent(IN) :: levelset
+        type(levelset_norm_field), optional, intent(IN) :: levelset_norm
+        type(scalar_field), intent(in), optional :: beta
 
         integer, dimension(num_dims) :: sizes_glb, sizes_loc
         integer, dimension(1) :: airfoil_glb, airfoil_loc, airfoil_start
@@ -611,15 +586,15 @@ contains
         !!  @param q_cons_vf Cell-average conservative variables
         !!  @param mpi_dir MPI communication coordinate direction
         !!  @param pbc_loc Processor boundary condition (PBC) location
-    subroutine s_mpi_sendrecv_variables_buffers(q_cons_vf, &
-                                                pb, mv, &
+    subroutine s_mpi_sendrecv_variables_buffers(q_comm, &
                                                 mpi_dir, &
-                                                pbc_loc)
-
-        type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
-        real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
+                                                pbc_loc, &
+                                                nVar, &
+                                                pb, mv)
 
-        integer, intent(in) :: mpi_dir, pbc_loc
+        type(scalar_field), dimension(1:), intent(inout) :: q_comm
+        real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
+        integer, intent(in) :: mpi_dir, pbc_loc, nVar
 
         integer :: i, j, k, l, r, q !< Generic loop iterators
 
@@ -629,7 +604,7 @@ contains
         integer :: beg_end(1:2), grid_dims(1:3)
         integer :: dst_proc, src_proc, recv_tag, send_tag
 
-        logical :: beg_end_geq_0
+        logical :: beg_end_geq_0, qbmm_comm
 
         integer :: pack_offset, unpack_offset
 
@@ -638,25 +613,27 @@ contains
 #ifdef MFC_MPI
 
         call nvtxStartRange("RHS-COMM-PACKBUF")
-!$acc update device(v_size)
 
-#ifdef MFC_SIMULATION
-        if (qbmm .and. .not. polytropic) then
+        qbmm_comm = .false.
+
+        if (present(pb) .and. present(mv) .and. qbmm .and. .not. polytropic) then
+            qbmm_comm = .true.
+            v_size = nVar + 2*nb*4
             buffer_counts = (/ &
-                            buff_size*(sys_size + 2*nb*4)*(n + 1)*(p + 1), &
-                            buff_size*(sys_size + 2*nb*4)*(m + 2*buff_size + 1)*(p + 1), &
+                            buff_size*v_size*(n + 1)*(p + 1), &
+                            buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), &
                             buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
                             /)
         else
-#endif
+            v_size = nVar
             buffer_counts = (/ &
-                            buff_size*sys_size*(n + 1)*(p + 1), &
-                            buff_size*sys_size*(m + 2*buff_size + 1)*(p + 1), &
+                            buff_size*v_size*(n + 1)*(p + 1), &
+                            buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), &
                             buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
                             /)
-#ifdef MFC_SIMULATION
         end if
-#endif
+
+        !$acc update device(v_size)
 
         buffer_count = buffer_counts(mpi_dir)
         boundary_conditions = (/bc_x, bc_y, bc_z/)
@@ -696,25 +673,24 @@ contains
                     do l = 0, p
                         do k = 0, n
                             do j = 0, buff_size - 1
-                                do i = 1, sys_size
+                                do i = 1, nVar
                                     r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l))
-                                    buff_send(r) = q_cons_vf(i)%sf(j + pack_offset, k, l)
+                                    buff_send(r) = q_comm(i)%sf(j + pack_offset, k, l)
                                 end do
                             end do
                         end do
                     end do
 
-#ifdef MFC_SIMULATION
-                    if (qbmm .and. .not. polytropic) then
+                    if (qbmm_comm) then
                         !$acc parallel loop collapse(4) gang vector default(present) private(r)
                         do l = 0, p
                             do k = 0, n
                                 do j = 0, buff_size - 1
-                                    do i = sys_size + 1, sys_size + 4
+                                    do i = nVar + 1, nVar + 4
                                         do q = 1, nb
                                             r = (i - 1) + (q - 1)*4 + v_size* &
                                                 (j + buff_size*(k + (n + 1)*l))
-                                            buff_send(r) = pb(j + pack_offset, k, l, i - sys_size, q)
+                                            buff_send(r) = pb(j + pack_offset, k, l, i - nVar, q)
                                         end do
                                     end do
                                 end do
@@ -725,37 +701,35 @@ contains
                         do l = 0, p
                             do k = 0, n
                                 do j = 0, buff_size - 1
-                                    do i = sys_size + 1, sys_size + 4
+                                    do i = nVar + 1, nVar + 4
                                         do q = 1, nb
                                             r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
                                                 (j + buff_size*(k + (n + 1)*l))
-                                            buff_send(r) = mv(j + pack_offset, k, l, i - sys_size, q)
+                                            buff_send(r) = mv(j + pack_offset, k, l, i - nVar, q)
                                         end do
                                     end do
                                 end do
                             end do
                         end do
                     end if
-#endif
                 #:elif mpi_dir == 2
                     !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, sys_size
+                    do i = 1, nVar
                         do l = 0, p
                             do k = 0, buff_size - 1
                                 do j = -buff_size, m + buff_size
                                     r = (i - 1) + v_size* &
                                         ((j + buff_size) + (m + 2*buff_size + 1)* &
                                          (k + buff_size*l))
-                                    buff_send(r) = q_cons_vf(i)%sf(j, k + pack_offset, l)
+                                    buff_send(r) = q_comm(i)%sf(j, k + pack_offset, l)
                                 end do
                             end do
                         end do
                     end do
 
-#ifdef MFC_SIMULATION
-                    if (qbmm .and. .not. polytropic) then
+                    if (qbmm_comm) then
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = sys_size + 1, sys_size + 4
+                        do i = nVar + 1, nVar + 4
                             do l = 0, p
                                 do k = 0, buff_size - 1
                                     do j = -buff_size, m + buff_size
@@ -763,7 +737,7 @@ contains
                                             r = (i - 1) + (q - 1)*4 + v_size* &
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  (k + buff_size*l))
-                                            buff_send(r) = pb(j, k + pack_offset, l, i - sys_size, q)
+                                            buff_send(r) = pb(j, k + pack_offset, l, i - nVar, q)
                                         end do
                                     end do
                                 end do
@@ -771,7 +745,7 @@ contains
                         end do
 
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = sys_size + 1, sys_size + 4
+                        do i = nVar + 1, nVar + 4
                             do l = 0, p
                                 do k = 0, buff_size - 1
                                     do j = -buff_size, m + buff_size
@@ -779,33 +753,31 @@ contains
                                             r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  (k + buff_size*l))
-                                            buff_send(r) = mv(j, k + pack_offset, l, i - sys_size, q)
+                                            buff_send(r) = mv(j, k + pack_offset, l, i - nVar, q)
                                         end do
                                     end do
                                 end do
                             end do
                         end do
                     end if
-#endif
                 #:else
                     !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, sys_size
+                    do i = 1, nVar
                         do l = 0, buff_size - 1
                             do k = -buff_size, n + buff_size
                                 do j = -buff_size, m + buff_size
                                     r = (i - 1) + v_size* &
                                         ((j + buff_size) + (m + 2*buff_size + 1)* &
                                          ((k + buff_size) + (n + 2*buff_size + 1)*l))
-                                    buff_send(r) = q_cons_vf(i)%sf(j, k, l + pack_offset)
+                                    buff_send(r) = q_comm(i)%sf(j, k, l + pack_offset)
                                 end do
                             end do
                         end do
                     end do
 
-#ifdef MFC_SIMULATION
-                    if (qbmm .and. .not. polytropic) then
+                    if (qbmm_comm) then
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = sys_size + 1, sys_size + 4
+                        do i = nVar + 1, nVar + 4
                             do l = 0, buff_size - 1
                                 do k = -buff_size, n + buff_size
                                     do j = -buff_size, m + buff_size
@@ -813,7 +785,7 @@ contains
                                             r = (i - 1) + (q - 1)*4 + v_size* &
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  ((k + buff_size) + (n + 2*buff_size + 1)*l))
-                                            buff_send(r) = pb(j, k, l + pack_offset, i - sys_size, q)
+                                            buff_send(r) = pb(j, k, l + pack_offset, i - nVar, q)
                                         end do
                                     end do
                                 end do
@@ -821,7 +793,7 @@ contains
                         end do
 
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = sys_size + 1, sys_size + 4
+                        do i = nVar + 1, nVar + 4
                             do l = 0, buff_size - 1
                                 do k = -buff_size, n + buff_size
                                     do j = -buff_size, m + buff_size
@@ -829,29 +801,24 @@ contains
                                             r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  ((k + buff_size) + (n + 2*buff_size + 1)*l))
-                                            buff_send(r) = mv(j, k, l + pack_offset, i - sys_size, q)
+                                            buff_send(r) = mv(j, k, l + pack_offset, i - nVar, q)
                                         end do
                                     end do
                                 end do
                             end do
                         end do
                     end if
-#endif
                 #:endif
             end if
         #:endfor
         call nvtxEndRange ! Packbuf
 
-        p_send => buff_send(0)
-        p_recv => buff_recv(0)
-
         ! Send/Recv
 #ifdef MFC_SIMULATION
         #:for rdma_mpi in [False, True]
             if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
                 #:if rdma_mpi
-                    !$acc data attach(p_send, p_recv)
-                    !$acc host_data use_device(p_send, p_recv)
+                    !$acc host_data use_device(buff_send, buff_recv)
                     call nvtxStartRange("RHS-COMM-SENDRECV-RDMA")
                 #:else
                     call nvtxStartRange("RHS-COMM-DEV2HOST")
@@ -861,15 +828,14 @@ contains
                 #:endif
 
                 call MPI_SENDRECV( &
-                    p_send, buffer_count, mpi_p, dst_proc, send_tag, &
-                    p_recv, buffer_count, mpi_p, src_proc, recv_tag, &
+                    buff_send, buffer_count, mpi_p, dst_proc, send_tag, &
+                    buff_recv, buffer_count, mpi_p, src_proc, recv_tag, &
                     MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
                 call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA
 
                 #:if rdma_mpi
                     !$acc end host_data
-                    !$acc end data
                     !$acc wait
                 #:else
                     call nvtxStartRange("RHS-COMM-HOST2DEV")
@@ -880,8 +846,8 @@ contains
         #:endfor
 #else
         call MPI_SENDRECV( &
-            p_send, buffer_count, mpi_p, dst_proc, send_tag, &
-            p_recv, buffer_count, mpi_p, src_proc, recv_tag, &
+            buff_send, buffer_count, mpi_p, dst_proc, send_tag, &
+            buff_recv, buffer_count, mpi_p, src_proc, recv_tag, &
             MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 #endif
 
@@ -894,12 +860,12 @@ contains
                     do l = 0, p
                         do k = 0, n
                             do j = -buff_size, -1
-                                do i = 1, sys_size
+                                do i = 1, nVar
                                     r = (i - 1) + v_size* &
                                         (j + buff_size*((k + 1) + (n + 1)*l))
-                                    q_cons_vf(i)%sf(j + unpack_offset, k, l) = buff_recv(r)
+                                    q_comm(i)%sf(j + unpack_offset, k, l) = buff_recv(r)
 #if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
+                                    if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
                                         print *, "Error", j, k, l, i
                                         error stop "NaN(s) in recv"
                                     end if
@@ -909,17 +875,16 @@ contains
                         end do
                     end do
 
-#ifdef MFC_SIMULATION
-                    if (qbmm .and. .not. polytropic) then
+                    if (qbmm_comm) then
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
                         do l = 0, p
                             do k = 0, n
                                 do j = -buff_size, -1
-                                    do i = sys_size + 1, sys_size + 4
+                                    do i = nVar + 1, nVar + 4
                                         do q = 1, nb
                                             r = (i - 1) + (q - 1)*4 + v_size* &
                                                 (j + buff_size*((k + 1) + (n + 1)*l))
-                                            pb(j + unpack_offset, k, l, i - sys_size, q) = buff_recv(r)
+                                            pb(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r)
                                         end do
                                     end do
                                 end do
@@ -930,30 +895,29 @@ contains
                         do l = 0, p
                             do k = 0, n
                                 do j = -buff_size, -1
-                                    do i = sys_size + 1, sys_size + 4
+                                    do i = nVar + 1, nVar + 4
                                         do q = 1, nb
                                             r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
                                                 (j + buff_size*((k + 1) + (n + 1)*l))
-                                            mv(j + unpack_offset, k, l, i - sys_size, q) = buff_recv(r)
+                                            mv(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r)
                                         end do
                                     end do
                                 end do
                             end do
                         end do
                     end if
-#endif
                 #:elif mpi_dir == 2
                     !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, sys_size
+                    do i = 1, nVar
                         do l = 0, p
                             do k = -buff_size, -1
                                 do j = -buff_size, m + buff_size
                                     r = (i - 1) + v_size* &
                                         ((j + buff_size) + (m + 2*buff_size + 1)* &
                                          ((k + buff_size) + buff_size*l))
-                                    q_cons_vf(i)%sf(j, k + unpack_offset, l) = buff_recv(r)
+                                    q_comm(i)%sf(j, k + unpack_offset, l) = buff_recv(r)
 #if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
+                                    if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
                                         print *, "Error", j, k, l, i
                                         error stop "NaN(s) in recv"
                                     end if
@@ -963,10 +927,9 @@ contains
                         end do
                     end do
 
-#ifdef MFC_SIMULATION
-                    if (qbmm .and. .not. polytropic) then
+                    if (qbmm_comm) then
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = sys_size + 1, sys_size + 4
+                        do i = nVar + 1, nVar + 4
                             do l = 0, p
                                 do k = -buff_size, -1
                                     do j = -buff_size, m + buff_size
@@ -974,7 +937,7 @@ contains
                                             r = (i - 1) + (q - 1)*4 + v_size* &
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  ((k + buff_size) + buff_size*l))
-                                            pb(j, k + unpack_offset, l, i - sys_size, q) = buff_recv(r)
+                                            pb(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r)
                                         end do
                                     end do
                                 end do
@@ -982,7 +945,7 @@ contains
                         end do
 
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = sys_size + 1, sys_size + 4
+                        do i = nVar + 1, nVar + 4
                             do l = 0, p
                                 do k = -buff_size, -1
                                     do j = -buff_size, m + buff_size
@@ -990,18 +953,17 @@ contains
                                             r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  ((k + buff_size) + buff_size*l))
-                                            mv(j, k + unpack_offset, l, i - sys_size, q) = buff_recv(r)
+                                            mv(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r)
                                         end do
                                     end do
                                 end do
                             end do
                         end do
                     end if
-#endif
                 #:else
                     ! Unpacking buffer from bc_z%beg
                     !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, sys_size
+                    do i = 1, nVar
                         do l = -buff_size, -1
                             do k = -buff_size, n + buff_size
                                 do j = -buff_size, m + buff_size
@@ -1009,9 +971,9 @@ contains
                                         ((j + buff_size) + (m + 2*buff_size + 1)* &
                                          ((k + buff_size) + (n + 2*buff_size + 1)* &
                                           (l + buff_size)))
-                                    q_cons_vf(i)%sf(j, k, l + unpack_offset) = buff_recv(r)
+                                    q_comm(i)%sf(j, k, l + unpack_offset) = buff_recv(r)
 #if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
+                                    if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
                                         print *, "Error", j, k, l, i
                                         error stop "NaN(s) in recv"
                                     end if
@@ -1021,10 +983,9 @@ contains
                         end do
                     end do
 
-#ifdef MFC_SIMULATION
-                    if (qbmm .and. .not. polytropic) then
+                    if (qbmm_comm) then
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = sys_size + 1, sys_size + 4
+                        do i = nVar + 1, nVar + 4
                             do l = -buff_size, -1
                                 do k = -buff_size, n + buff_size
                                     do j = -buff_size, m + buff_size
@@ -1033,7 +994,7 @@ contains
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  ((k + buff_size) + (n + 2*buff_size + 1)* &
                                                   (l + buff_size)))
-                                            pb(j, k, l + unpack_offset, i - sys_size, q) = buff_recv(r)
+                                            pb(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r)
                                         end do
                                     end do
                                 end do
@@ -1041,7 +1002,7 @@ contains
                         end do
 
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = sys_size + 1, sys_size + 4
+                        do i = nVar + 1, nVar + 4
                             do l = -buff_size, -1
                                 do k = -buff_size, n + buff_size
                                     do j = -buff_size, m + buff_size
@@ -1050,246 +1011,682 @@ contains
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  ((k + buff_size) + (n + 2*buff_size + 1)* &
                                                   (l + buff_size)))
-                                            mv(j, k, l + unpack_offset, i - sys_size, q) = buff_recv(r)
+                                            mv(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r)
                                         end do
                                     end do
                                 end do
                             end do
                         end do
                     end if
-#endif
                 #:endif
             end if
         #:endfor
         call nvtxEndRange
-
 #endif
 
     end subroutine s_mpi_sendrecv_variables_buffers
 
-    subroutine s_mpi_sendrecv_capilary_variables_buffers(c_divs_vf, mpi_dir, pbc_loc)
+    !>  The purpose of this procedure is to optimally decompose
+        !!      the computational domain among the available processors.
+        !!      This is performed by attempting to award each processor,
+        !!      in each of the coordinate directions, approximately the
+        !!      same number of cells, and then recomputing the affected
+        !!      global parameters.
+    subroutine s_mpi_decompose_computational_domain
 
-        type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs_vf
-        integer, intent(in) :: mpi_dir, pbc_loc
+#ifdef MFC_MPI
 
-        integer :: i, j, k, l, r !< Generic loop iterators
+        integer :: num_procs_x, num_procs_y, num_procs_z !<
+            !! Optimal number of processors in the x-, y- and z-directions
 
-        integer :: buffer_counts(1:3), buffer_count
+        real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z !<
+            !! Non-optimal number of processors in the x-, y- and z-directions
 
-        type(int_bounds_info) :: boundary_conditions(1:3)
-        integer :: beg_end(1:2), grid_dims(1:3)
-        integer :: dst_proc, src_proc, recv_tag, send_tag
+        real(wp) :: fct_min !<
+            !! Processor factorization (fct) minimization parameter
 
-        logical :: beg_end_geq_0
+        integer :: MPI_COMM_CART !<
+            !! Cartesian processor topology communicator
 
-        integer :: pack_offset, unpack_offset
-        real(wp), pointer :: p_send, p_recv
+        integer :: rem_cells !<
+            !! Remaining number of cells, in a particular coordinate direction,
+            !! after the majority is divided up among the available processors
 
-#ifdef MFC_MPI
+        integer :: i, j !< Generic loop iterators
 
-        nVars = num_dims + 1
-        !$acc update device(nVars)
+        if (num_procs == 1 .and. parallel_io) then
+            do i = 1, num_dims
+                start_idx(i) = 0
+            end do
+            return
+        end if
 
-        buffer_counts = (/ &
-                        buff_size*nVars*(n + 1)*(p + 1), &
-                        buff_size*nVars*(m + 2*buff_size + 1)*(p + 1), &
-                        buff_size*nVars*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
-                        /)
+        ! 3D Cartesian Processor Topology
+        if (n > 0) then
 
-        buffer_count = buffer_counts(mpi_dir)
-        boundary_conditions = (/bc_x, bc_y, bc_z/)
-        beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/)
-        beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0
+            if (p > 0) then
 
-        ! Implements:
-        ! pbc_loc  bc_x >= 0 -> [send/recv]_tag  [dst/src]_proc
-        ! -1 (=0)      0            ->     [1,0]       [0,0]      | 0 0 [1,0] [beg,beg]
-        ! -1 (=0)      1            ->     [0,0]       [1,0]      | 0 1 [0,0] [end,beg]
-        ! +1 (=1)      0            ->     [0,1]       [1,1]      | 1 0 [0,1] [end,end]
-        ! +1 (=1)      1            ->     [1,1]       [0,1]      | 1 1 [1,1] [beg,end]
+                if (cyl_coord .and. p > 0) then
+                    ! Implement pencil processor blocking if using cylindrical coordinates so
+                    ! that all cells in azimuthal direction are stored on a single processor.
+                    ! This is necessary for efficient application of Fourier filter near axis.
 
-        send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1))
-        recv_tag = f_logical_to_int(pbc_loc == 1)
+                    ! Initial values of the processor factorization optimization
+                    num_procs_x = 1
+                    num_procs_y = num_procs
+                    num_procs_z = 1
+                    ierr = -1
 
-        dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0)))
-        src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1))
+                    ! Computing minimization variable for these initial values
+                    tmp_num_procs_x = num_procs_x
+                    tmp_num_procs_y = num_procs_y
+                    tmp_num_procs_z = num_procs_z
+                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
+                                         - (n + 1)/tmp_num_procs_y)
 
-        grid_dims = (/m, n, p/)
+                    ! Searching for optimal computational domain distribution
+                    do i = 1, num_procs
 
-        pack_offset = 0
-        if (f_xor(pbc_loc == 1, beg_end_geq_0)) then
-            pack_offset = grid_dims(mpi_dir) - buff_size + 1
-        end if
+                        if (mod(num_procs, i) == 0 &
+                            .and. &
+                            (m + 1)/i >= num_stcls_min*weno_order) then
 
-        unpack_offset = 0
-        if (pbc_loc == 1) then
-            unpack_offset = grid_dims(mpi_dir) + buff_size + 1
-        end if
+                            tmp_num_procs_x = i
+                            tmp_num_procs_y = num_procs/i
 
-        ! Pack Buffer to Send
-        #:for mpi_dir in [1, 2, 3]
-            if (mpi_dir == ${mpi_dir}$) then
-                #:if mpi_dir == 1
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do l = 0, p
-                        do k = 0, n
-                            do j = 0, buff_size - 1
-                                do i = 1, nVars
-                                    r = (i - 1) + nVars*(j + buff_size*(k + (n + 1)*l))
-                                    buff_send(r) = c_divs_vf(i)%sf(j + pack_offset, k, l)
-                                end do
-                            end do
-                        end do
-                    end do
+                            if (fct_min >= abs((m + 1)/tmp_num_procs_x &
+                                               - (n + 1)/tmp_num_procs_y) &
+                                .and. &
+                                (n + 1)/tmp_num_procs_y &
+                                >= &
+                                num_stcls_min*weno_order) then
+
+                                num_procs_x = i
+                                num_procs_y = num_procs/i
+                                fct_min = abs((m + 1)/tmp_num_procs_x &
+                                              - (n + 1)/tmp_num_procs_y)
+                                ierr = 0
+
+                            end if
+
+                        end if
 
-                #:elif mpi_dir == 2
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, nVars
-                        do l = 0, p
-                            do k = 0, buff_size - 1
-                                do j = -buff_size, m + buff_size
-                                    r = (i - 1) + nVars* &
-                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                         (k + buff_size*l))
-                                    buff_send(r) = c_divs_vf(i)%sf(j, k + pack_offset, l)
-                                end do
-                            end do
-                        end do
                     end do
 
-                #:else
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, nVars
-                        do l = 0, buff_size - 1
-                            do k = -buff_size, n + buff_size
-                                do j = -buff_size, m + buff_size
-                                    r = (i - 1) + nVars* &
-                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                         ((k + buff_size) + (n + 2*buff_size + 1)*l))
-                                    buff_send(r) = c_divs_vf(i)%sf(j, k, l + pack_offset)
-                                end do
+                else
+
+                    ! Initial estimate of optimal processor topology
+                    num_procs_x = 1
+                    num_procs_y = 1
+                    num_procs_z = num_procs
+                    ierr = -1
+
+                    ! Benchmarking the quality of this initial guess
+                    tmp_num_procs_x = num_procs_x
+                    tmp_num_procs_y = num_procs_y
+                    tmp_num_procs_z = num_procs_z
+                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
+                                         - (n + 1)/tmp_num_procs_y) &
+                              + 10._wp*abs((n + 1)/tmp_num_procs_y &
+                                           - (p + 1)/tmp_num_procs_z)
+
+                    ! Optimization of the initial processor topology
+                    do i = 1, num_procs
+
+                        if (mod(num_procs, i) == 0 &
+                            .and. &
+                            (m + 1)/i >= num_stcls_min*weno_order) then
+
+                            do j = 1, num_procs/i
+
+                                if (mod(num_procs/i, j) == 0 &
+                                    .and. &
+                                    (n + 1)/j >= num_stcls_min*weno_order) then
+
+                                    tmp_num_procs_x = i
+                                    tmp_num_procs_y = j
+                                    tmp_num_procs_z = num_procs/(i*j)
+
+                                    if (fct_min >= abs((m + 1)/tmp_num_procs_x &
+                                                       - (n + 1)/tmp_num_procs_y) &
+                                        + abs((n + 1)/tmp_num_procs_y &
+                                              - (p + 1)/tmp_num_procs_z) &
+                                        .and. &
+                                        (p + 1)/tmp_num_procs_z &
+                                        >= &
+                                        num_stcls_min*weno_order) &
+                                        then
+
+                                        num_procs_x = i
+                                        num_procs_y = j
+                                        num_procs_z = num_procs/(i*j)
+                                        fct_min = abs((m + 1)/tmp_num_procs_x &
+                                                      - (n + 1)/tmp_num_procs_y) &
+                                                  + abs((n + 1)/tmp_num_procs_y &
+                                                        - (p + 1)/tmp_num_procs_z)
+                                        ierr = 0
+
+                                    end if
+
+                                end if
+
                             end do
-                        end do
+
+                        end if
+
                     end do
-                #:endif
+
+                end if
+
+                ! Verifying that a valid decomposition of the computational
+                ! domain has been established. If not, the simulation exits.
+                if (proc_rank == 0 .and. ierr == -1) then
+                    call s_mpi_abort('Unsupported combination of values '// &
+                                     'of num_procs, m, n, p and '// &
+                                     'weno_order. Exiting.')
+                end if
+
+                ! Creating new communicator using the Cartesian topology
+                call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, &
+                                                          num_procs_y, num_procs_z/), &
+                                     (/.true., .true., .true./), &
+                                     .false., MPI_COMM_CART, ierr)
+
+                ! Finding the Cartesian coordinates of the local process
+                call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, &
+                                     proc_coords, ierr)
+                ! END: 3D Cartesian Processor Topology
+
+                ! Global Parameters for z-direction
+
+                ! Number of remaining cells
+                rem_cells = mod(p + 1, num_procs_z)
+
+                ! Optimal number of cells per processor
+                p = (p + 1)/num_procs_z - 1
+
+                ! Distributing the remaining cells
+                do i = 1, rem_cells
+                    if (proc_coords(3) == i - 1) then
+                        p = p + 1; exit
+                    end if
+                end do
+
+                ! Boundary condition at the beginning
+                if (proc_coords(3) > 0 .or. (bc_z%beg == BC_PERIODIC .and. num_procs_z > 1)) then
+                    proc_coords(3) = proc_coords(3) - 1
+                    call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
+                                       bc_z%beg, ierr)
+                    proc_coords(3) = proc_coords(3) + 1
+                end if
+
+                ! Boundary condition at the end
+                if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == BC_PERIODIC .and. num_procs_z > 1)) then
+                    proc_coords(3) = proc_coords(3) + 1
+                    call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
+                                       bc_z%end, ierr)
+                    proc_coords(3) = proc_coords(3) - 1
+                end if
+
+#ifdef MFC_POST_PROCESS
+                ! Ghost zone at the beginning
+                if (proc_coords(3) > 0 .and. format == 1) then
+                    offset_z%beg = 2
+                else
+                    offset_z%beg = 0
+                end if
+
+                ! Ghost zone at the end
+                if (proc_coords(3) < num_procs_z - 1 .and. format == 1) then
+                    offset_z%end = 2
+                else
+                    offset_z%end = 0
+                end if
+#endif
+
+                ! Beginning and end sub-domain boundary locations
+                if (parallel_io) then
+                    if (proc_coords(3) < rem_cells) then
+                        start_idx(3) = (p + 1)*proc_coords(3)
+                    else
+                        start_idx(3) = (p + 1)*proc_coords(3) + rem_cells
+                    end if
+                else
+#ifdef MFC_PRE_PROCESS
+                    if (old_grid .neqv. .true.) then
+                        dz = (z_domain%end - z_domain%beg)/real(p_glb + 1, wp)
+
+                        if (proc_coords(3) < rem_cells) then
+                            z_domain%beg = z_domain%beg + dz*real((p + 1)* &
+                                                                  proc_coords(3))
+                            z_domain%end = z_domain%end - dz*real((p + 1)* &
+                                                                  (num_procs_z - proc_coords(3) - 1) &
+                                                                  - (num_procs_z - rem_cells))
+                        else
+                            z_domain%beg = z_domain%beg + dz*real((p + 1)* &
+                                                                  proc_coords(3) + rem_cells)
+                            z_domain%end = z_domain%end - dz*real((p + 1)* &
+                                                                  (num_procs_z - proc_coords(3) - 1))
+                        end if
+                    end if
+#endif
+                end if
+
+                ! 2D Cartesian Processor Topology
+            else
+
+                ! Initial estimate of optimal processor topology
+                num_procs_x = 1
+                num_procs_y = num_procs
+                ierr = -1
+
+                ! Benchmarking the quality of this initial guess
+                tmp_num_procs_x = num_procs_x
+                tmp_num_procs_y = num_procs_y
+                fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
+                                     - (n + 1)/tmp_num_procs_y)
+
+                ! Optimization of the initial processor topology
+                do i = 1, num_procs
+
+                    if (mod(num_procs, i) == 0 &
+                        .and. &
+                        (m + 1)/i >= num_stcls_min*weno_order) then
+
+                        tmp_num_procs_x = i
+                        tmp_num_procs_y = num_procs/i
+
+                        if (fct_min >= abs((m + 1)/tmp_num_procs_x &
+                                           - (n + 1)/tmp_num_procs_y) &
+                            .and. &
+                            (n + 1)/tmp_num_procs_y &
+                            >= &
+                            num_stcls_min*weno_order) then
+
+                            num_procs_x = i
+                            num_procs_y = num_procs/i
+                            fct_min = abs((m + 1)/tmp_num_procs_x &
+                                          - (n + 1)/tmp_num_procs_y)
+                            ierr = 0
+
+                        end if
+
+                    end if
+
+                end do
+
+                ! Verifying that a valid decomposition of the computational
+                ! domain has been established. If not, the simulation exits.
+                if (proc_rank == 0 .and. ierr == -1) then
+                    call s_mpi_abort('Unsupported combination of values '// &
+                                     'of num_procs, m, n and '// &
+                                     'weno_order. Exiting.')
+                end if
+
+                ! Creating new communicator using the Cartesian topology
+                call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, &
+                                                          num_procs_y/), (/.true., &
+                                                                           .true./), .false., MPI_COMM_CART, &
+                                     ierr)
+
+                ! Finding the Cartesian coordinates of the local process
+                call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, &
+                                     proc_coords, ierr)
+
             end if
-        #:endfor
-        p_send => buff_send(0)
-        p_recv => buff_recv(0)
-        ! Send/Recv
-#ifdef MFC_SIMULATION
-        #:for rdma_mpi in [False, True]
-            if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
-                #:if rdma_mpi
-                    !$acc data attach(p_send, p_recv)
-                    !$acc host_data use_device(p_send, p_recv)
-                    call nvtxStartRange("RHS-COMM-SENDRECV-RDMA")
-                #:else
-                    call nvtxStartRange("RHS-COMM-DEV2HOST")
-                    !$acc update host(buff_send)
-                    call nvtxEndRange
-                    call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA")
-                #:endif
+            ! END: 2D Cartesian Processor Topology
 
-                call MPI_SENDRECV( &
-                    p_send, buffer_count, mpi_p, dst_proc, send_tag, &
-                    p_recv, buffer_count, mpi_p, src_proc, recv_tag, &
-                    MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+            ! Global Parameters for y-direction
 
-                call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA
+            ! Number of remaining cells
+            rem_cells = mod(n + 1, num_procs_y)
 
-                #:if rdma_mpi
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                #:else
-                    call nvtxStartRange("RHS-COMM-HOST2DEV")
-                    !$acc update device(buff_recv)
-                    call nvtxEndRange
-                #:endif
+            ! Optimal number of cells per processor
+            n = (n + 1)/num_procs_y - 1
+
+            ! Distributing the remaining cells
+            do i = 1, rem_cells
+                if (proc_coords(2) == i - 1) then
+                    n = n + 1; exit
+                end if
+            end do
+
+            ! Boundary condition at the beginning
+            if (proc_coords(2) > 0 .or. (bc_y%beg == BC_PERIODIC .and. num_procs_y > 1)) then
+                proc_coords(2) = proc_coords(2) - 1
+                call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
+                                   bc_y%beg, ierr)
+                proc_coords(2) = proc_coords(2) + 1
+            end if
+
+            ! Boundary condition at the end
+            if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == BC_PERIODIC .and. num_procs_y > 1)) then
+                proc_coords(2) = proc_coords(2) + 1
+                call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
+                                   bc_y%end, ierr)
+                proc_coords(2) = proc_coords(2) - 1
+            end if
+
+#ifdef MFC_POST_PROCESS
+            ! Ghost zone at the beginning
+            if (proc_coords(2) > 0 .and. format == 1) then
+                offset_y%beg = 2
+            else
+                offset_y%beg = 0
+            end if
+
+            ! Ghost zone at the end
+            if (proc_coords(2) < num_procs_y - 1 .and. format == 1) then
+                offset_y%end = 2
+            else
+                offset_y%end = 0
             end if
-        #:endfor
-#else
-        call MPI_SENDRECV( &
-            p_send, buffer_count, mpi_p, dst_proc, send_tag, &
-            p_recv, buffer_count, mpi_p, src_proc, recv_tag, &
-            MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 #endif
 
-        ! Unpack Received Buffer
-        #:for mpi_dir in [1, 2, 3]
-            if (mpi_dir == ${mpi_dir}$) then
-                #:if mpi_dir == 1
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do l = 0, p
-                        do k = 0, n
-                            do j = -buff_size, -1
-                                do i = 1, nVars
-                                    r = (i - 1) + nVars* &
-                                        (j + buff_size*((k + 1) + (n + 1)*l))
-                                    c_divs_vf(i)%sf(j + unpack_offset, k, l) = buff_recv(r)
-#if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then
-                                        print *, "Error", j, k, l, i
-                                        error stop "NaN(s) in recv"
-                                    end if
+            ! Beginning and end sub-domain boundary locations
+            if (parallel_io) then
+                if (proc_coords(2) < rem_cells) then
+                    start_idx(2) = (n + 1)*proc_coords(2)
+                else
+                    start_idx(2) = (n + 1)*proc_coords(2) + rem_cells
+                end if
+            else
+#ifdef MFC_PRE_PROCESS
+                if (old_grid .neqv. .true.) then
+                    dy = (y_domain%end - y_domain%beg)/real(n_glb + 1, wp)
+
+                    if (proc_coords(2) < rem_cells) then
+                        y_domain%beg = y_domain%beg + dy*real((n + 1)* &
+                                                              proc_coords(2))
+                        y_domain%end = y_domain%end - dy*real((n + 1)* &
+                                                              (num_procs_y - proc_coords(2) - 1) &
+                                                              - (num_procs_y - rem_cells))
+                    else
+                        y_domain%beg = y_domain%beg + dy*real((n + 1)* &
+                                                              proc_coords(2) + rem_cells)
+                        y_domain%end = y_domain%end - dy*real((n + 1)* &
+                                                              (num_procs_y - proc_coords(2) - 1))
+                    end if
+                end if
 #endif
-                                end do
-                            end do
-                        end do
-                    end do
+            end if
 
-                #:elif mpi_dir == 2
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, nVars
-                        do l = 0, p
-                            do k = -buff_size, -1
-                                do j = -buff_size, m + buff_size
-                                    r = (i - 1) + nVars* &
-                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                         ((k + buff_size) + buff_size*l))
-                                    c_divs_vf(i)%sf(j, k + unpack_offset, l) = buff_recv(r)
-#if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then
-                                        print *, "Error", j, k, l, i
-                                        error stop "NaN(s) in recv"
-                                    end if
+            ! 1D Cartesian Processor Topology
+        else
+
+            ! Optimal processor topology
+            num_procs_x = num_procs
+
+            ! Creating new communicator using the Cartesian topology
+            call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), &
+                                 (/.true./), .false., MPI_COMM_CART, &
+                                 ierr)
+
+            ! Finding the Cartesian coordinates of the local process
+            call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, &
+                                 proc_coords, ierr)
+
+        end if
+
+        ! Global Parameters for x-direction
+
+        ! Number of remaining cells
+        rem_cells = mod(m + 1, num_procs_x)
+
+        ! Optimal number of cells per processor
+        m = (m + 1)/num_procs_x - 1
+
+        ! Distributing the remaining cells
+        do i = 1, rem_cells
+            if (proc_coords(1) == i - 1) then
+                m = m + 1; exit
+            end if
+        end do
+
+        ! Boundary condition at the beginning
+        if (proc_coords(1) > 0 .or. (bc_x%beg == BC_PERIODIC .and. num_procs_x > 1)) then
+            proc_coords(1) = proc_coords(1) - 1
+            call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr)
+            proc_coords(1) = proc_coords(1) + 1
+        end if
+
+        ! Boundary condition at the end
+        if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == BC_PERIODIC .and. num_procs_x > 1)) then
+            proc_coords(1) = proc_coords(1) + 1
+            call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr)
+            proc_coords(1) = proc_coords(1) - 1
+        end if
+
+#ifdef MFC_POST_PROCESS
+        ! Ghost zone at the beginning
+        if (proc_coords(1) > 0 .and. format == 1 .and. n > 0) then
+            offset_x%beg = 2
+        else
+            offset_x%beg = 0
+        end if
+
+        ! Ghost zone at the end
+        if (proc_coords(1) < num_procs_x - 1 .and. format == 1 .and. n > 0) then
+            offset_x%end = 2
+        else
+            offset_x%end = 0
+        end if
 #endif
-                                end do
-                            end do
-                        end do
-                    end do
 
-                #:else
-                    ! Unpacking buffer from bc_z%beg
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, nVars
-                        do l = -buff_size, -1
-                            do k = -buff_size, n + buff_size
-                                do j = -buff_size, m + buff_size
-                                    r = (i - 1) + nVars* &
-                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                         ((k + buff_size) + (n + 2*buff_size + 1)* &
-                                          (l + buff_size)))
-                                    c_divs_vf(i)%sf(j, k, l + unpack_offset) = buff_recv(r)
-#if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then
-                                        print *, "Error", j, k, l, i
-                                        error stop "NaN(s) in recv"
-                                    end if
+        ! Beginning and end sub-domain boundary locations
+        if (parallel_io) then
+            if (proc_coords(1) < rem_cells) then
+                start_idx(1) = (m + 1)*proc_coords(1)
+            else
+                start_idx(1) = (m + 1)*proc_coords(1) + rem_cells
+            end if
+        else
+#ifdef MFC_PRE_PROCESS
+            if (old_grid .neqv. .true.) then
+                dx = (x_domain%end - x_domain%beg)/real(m_glb + 1, wp)
+
+                if (proc_coords(1) < rem_cells) then
+                    x_domain%beg = x_domain%beg + dx*real((m + 1)* &
+                                                          proc_coords(1))
+                    x_domain%end = x_domain%end - dx*real((m + 1)* &
+                                                          (num_procs_x - proc_coords(1) - 1) &
+                                                          - (num_procs_x - rem_cells))
+                else
+                    x_domain%beg = x_domain%beg + dx*real((m + 1)* &
+                                                          proc_coords(1) + rem_cells)
+                    x_domain%end = x_domain%end - dx*real((m + 1)* &
+                                                          (num_procs_x - proc_coords(1) - 1))
+                end if
+            end if
+#endif
+        end if
 #endif
-                                end do
-                            end do
-                        end do
-                    end do
 
-                #:endif
+    end subroutine s_mpi_decompose_computational_domain
+
+    !>  The goal of this procedure is to populate the buffers of
+        !!      the grid variables by communicating with the neighboring
+        !!      processors. Note that only the buffers of the cell-width
+        !!      distributions are handled in such a way. This is because
+        !!      the buffers of cell-boundary locations may be calculated
+        !!      directly from those of the cell-width distributions.
+        !!  @param mpi_dir MPI communication coordinate direction
+        !!  @param pbc_loc Processor boundary condition (PBC) location
+#ifndef MFC_PRE_PROCESS
+    subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc)
+
+        integer, intent(in) :: mpi_dir
+        integer, intent(in) :: pbc_loc
+
+#ifdef MFC_MPI
+
+        ! MPI Communication in x-direction
+        if (mpi_dir == 1) then
+
+            if (pbc_loc == -1) then      ! PBC at the beginning
+
+                if (bc_x%end >= 0) then      ! PBC at the beginning and end
+
+                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
+                    call MPI_SENDRECV( &
+                        dx(m - buff_size + 1), buff_size, &
+                        mpi_p, bc_x%end, 0, &
+                        dx(-buff_size), buff_size, &
+                        mpi_p, bc_x%beg, 0, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                else                        ! PBC at the beginning only
+
+                    ! Send/receive buffer to/from bc_x%beg/bc_x%beg
+                    call MPI_SENDRECV( &
+                        dx(0), buff_size, &
+                        mpi_p, bc_x%beg, 1, &
+                        dx(-buff_size), buff_size, &
+                        mpi_p, bc_x%beg, 0, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                end if
+
+            else                        ! PBC at the end
+
+                if (bc_x%beg >= 0) then      ! PBC at the end and beginning
+
+                    ! Send/receive buffer to/from bc_x%beg/bc_x%end
+                    call MPI_SENDRECV( &
+                        dx(0), buff_size, &
+                        mpi_p, bc_x%beg, 1, &
+                        dx(m + 1), buff_size, &
+                        mpi_p, bc_x%end, 1, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                else                        ! PBC at the end only
+
+                    ! Send/receive buffer to/from bc_x%end/bc_x%end
+                    call MPI_SENDRECV( &
+                        dx(m - buff_size + 1), buff_size, &
+                        mpi_p, bc_x%end, 0, &
+                        dx(m + 1), buff_size, &
+                        mpi_p, bc_x%end, 1, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                end if
+
             end if
-        #:endfor
+            ! END: MPI Communication in x-direction
+
+            ! MPI Communication in y-direction
+        elseif (mpi_dir == 2) then
+
+            if (pbc_loc == -1) then      ! PBC at the beginning
+
+                if (bc_y%end >= 0) then      ! PBC at the beginning and end
+
+                    ! Send/receive buffer to/from bc_y%end/bc_y%beg
+                    call MPI_SENDRECV( &
+                        dy(n - buff_size + 1), buff_size, &
+                        mpi_p, bc_y%end, 0, &
+                        dy(-buff_size), buff_size, &
+                        mpi_p, bc_y%beg, 0, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                else                        ! PBC at the beginning only
+
+                    ! Send/receive buffer to/from bc_y%beg/bc_y%beg
+                    call MPI_SENDRECV( &
+                        dy(0), buff_size, &
+                        mpi_p, bc_y%beg, 1, &
+                        dy(-buff_size), buff_size, &
+                        mpi_p, bc_y%beg, 0, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                end if
+
+            else                        ! PBC at the end
+
+                if (bc_y%beg >= 0) then      ! PBC at the end and beginning
+
+                    ! Send/receive buffer to/from bc_y%beg/bc_y%end
+                    call MPI_SENDRECV( &
+                        dy(0), buff_size, &
+                        mpi_p, bc_y%beg, 1, &
+                        dy(n + 1), buff_size, &
+                        mpi_p, bc_y%end, 1, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                else                        ! PBC at the end only
+
+                    ! Send/receive buffer to/from bc_y%end/bc_y%end
+                    call MPI_SENDRECV( &
+                        dy(n - buff_size + 1), buff_size, &
+                        mpi_p, bc_y%end, 0, &
+                        dy(n + 1), buff_size, &
+                        mpi_p, bc_y%end, 1, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                end if
 
+            end if
+            ! END: MPI Communication in y-direction
+
+            ! MPI Communication in z-direction
+        else
+
+            if (pbc_loc == -1) then      ! PBC at the beginning
+
+                if (bc_z%end >= 0) then      ! PBC at the beginning and end
+
+                    ! Send/receive buffer to/from bc_z%end/bc_z%beg
+                    call MPI_SENDRECV( &
+                        dz(p - buff_size + 1), buff_size, &
+                        mpi_p, bc_z%end, 0, &
+                        dz(-buff_size), buff_size, &
+                        mpi_p, bc_z%beg, 0, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                else                        ! PBC at the beginning only
+
+                    ! Send/receive buffer to/from bc_z%beg/bc_z%beg
+                    call MPI_SENDRECV( &
+                        dz(0), buff_size, &
+                        mpi_p, bc_z%beg, 1, &
+                        dz(-buff_size), buff_size, &
+                        mpi_p, bc_z%beg, 0, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                end if
+
+            else                        ! PBC at the end
+
+                if (bc_z%beg >= 0) then      ! PBC at the end and beginning
+
+                    ! Send/receive buffer to/from bc_z%beg/bc_z%end
+                    call MPI_SENDRECV( &
+                        dz(0), buff_size, &
+                        mpi_p, bc_z%beg, 1, &
+                        dz(p + 1), buff_size, &
+                        mpi_p, bc_z%end, 1, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                else                        ! PBC at the end only
+
+                    ! Send/receive buffer to/from bc_z%end/bc_z%end
+                    call MPI_SENDRECV( &
+                        dz(p - buff_size + 1), buff_size, &
+                        mpi_p, bc_z%end, 0, &
+                        dz(p + 1), buff_size, &
+                        mpi_p, bc_z%end, 1, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                end if
+
+            end if
+
+        end if
+        ! END: MPI Communication in z-direction
 #endif
 
-    end subroutine s_mpi_sendrecv_capilary_variables_buffers
+    end subroutine s_mpi_sendrecv_grid_variables_buffers
+#endif
 
     !> Module deallocation and/or disassociation procedures
     impure subroutine s_finalize_mpi_common_module
diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp
index 6b506b77cd..2d3eb6a3a5 100644
--- a/src/common/m_variables_conversion.fpp
+++ b/src/common/m_variables_conversion.fpp
@@ -1138,6 +1138,10 @@ contains
 
                     if (cont_damage) qK_prim_vf(damage_idx)%sf(j, k, l) = qK_cons_vf(damage_idx)%sf(j, k, l)
 
+#ifdef MFC_POST_PROCESS
+                    if (bubbles_lagrange) qK_prim_vf(beta_idx)%sf(j, k, l) = qK_cons_vf(beta_idx)%sf(j, k, l)
+#endif
+
                 end do
             end do
         end do
diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.f90
index 45a2baa728..4695e27052 100644
--- a/src/post_process/m_data_input.f90
+++ b/src/post_process/m_data_input.f90
@@ -17,8 +17,12 @@ module m_data_input
 
     use m_mpi_proxy             !< Message passing interface (MPI) module proxy
 
+    use m_mpi_common
+
     use m_compile_specific
 
+    use m_boundary_common
+
     use m_helper
 
     implicit none
@@ -27,8 +31,6 @@ module m_data_input
  s_read_data_files, &
  s_read_serial_data_files, &
  s_read_parallel_data_files, &
- s_populate_grid_variables_buffer_regions, &
- s_populate_conservative_variables_buffer_regions, &
  s_finalize_data_input_module
 
     abstract interface
@@ -51,8 +53,8 @@ end subroutine s_read_abstract_data_files
     type(scalar_field), allocatable, dimension(:), public :: q_prim_vf !<
     !! Primitive variables
 
-    type(scalar_field), allocatable, dimension(:), public :: q_particle !<
-    !! Lagrangian solver (particle void fraction)
+    type(integer_field), allocatable, dimension(:, :), public :: bc_type !<
+    !! Boundary condition identifiers
 
     type(scalar_field), public :: q_T_sf !<
     !! Temperature field
@@ -125,6 +127,11 @@ impure subroutine s_read_serial_data_files(t_step)
                              ' is missing. Exiting.')
         end if
 
+        if (bc_io) then
+            call s_read_serial_boundary_condition_files(t_step_dir, bc_type)
+        else
+            call s_assign_default_bc_type(bc_type)
+        end if
         ! Reading the Grid Data File for the x-direction
 
         ! Checking whether x_cb.dat exists
@@ -238,29 +245,6 @@ impure subroutine s_read_serial_data_files(t_step)
             end if
         end if
 
-        if (bubbles_lagrange) then !Lagrangian solver
-
-            ! Checking whether the data file associated with the variable
-            ! position of currently manipulated conservative variable exists
-            write (file_num, '(I0)') sys_size + 1
-            file_loc = trim(t_step_dir)//'/q_cons_vf'// &
-                       trim(file_num)//'.dat'
-            inquire (FILE=trim(file_loc), EXIST=file_check)
-
-            ! Reading the data file if it exists, exiting otherwise
-            if (file_check) then
-                open (1, FILE=trim(file_loc), FORM='unformatted', &
-                      STATUS='old', ACTION='read')
-                read (1) q_particle(1)%sf(0:m, 0:n, 0:p)
-                close (1)
-            else
-                print '(A)', 'File q_cons_vf'//trim(file_num)// &
-                    '.dat is missing in '//trim(t_step_dir)// &
-                    '. Exiting.'
-                call s_mpi_abort()
-            end if
-        end if
-
     end subroutine s_read_serial_data_files
 
     !>  This subroutine is called at each time-step that has to
@@ -292,14 +276,6 @@ impure subroutine s_read_parallel_data_files(t_step)
 
         integer :: i
 
-        integer :: alt_sys !Altered sys_size for lagrangian solver
-
-        if (bubbles_lagrange) then
-            alt_sys = sys_size + 1
-        else
-            alt_sys = sys_size
-        end if
-
         allocate (x_cb_glb(-1:m_glb))
         allocate (y_cb_glb(-1:n_glb))
         allocate (z_cb_glb(-1:p_glb))
@@ -455,8 +431,6 @@ impure subroutine s_read_parallel_data_files(t_step)
                 ! Initialize MPI data I/O
                 if (ib) then
                     call s_initialize_mpi_data(q_cons_vf, ib_markers)
-                elseif (bubbles_lagrange) then
-                    call s_initialize_mpi_data(q_cons_vf, beta=q_particle(1))
                 else
                     call s_initialize_mpi_data(q_cons_vf)
                 end if
@@ -471,46 +445,20 @@ impure subroutine s_read_parallel_data_files(t_step)
                 WP_MOK = int(8._wp, MPI_OFFSET_KIND)
                 MOK = int(1._wp, MPI_OFFSET_KIND)
                 str_MOK = int(name_len, MPI_OFFSET_KIND)
-                NVARS_MOK = int(alt_sys, MPI_OFFSET_KIND)
+                NVARS_MOK = int(sys_size, MPI_OFFSET_KIND)
 
                 ! Read the data for each variable
-                if (bubbles_euler .or. elasticity) then
-                    do i = 1, sys_size
-                        var_MOK = int(i, MPI_OFFSET_KIND)
-
-                        ! Initial displacement to skip at beginning of file
-                        disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1)
-
-                        call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), &
-                                               'native', mpi_info_int, ierr)
-                        call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size, &
-                                               mpi_p, status, ierr)
-                    end do
-                else
-                    do i = 1, sys_size
-                        var_MOK = int(i, MPI_OFFSET_KIND)
-
-                        ! Initial displacement to skip at beginning of file
-                        disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1)
-
-                        call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), &
-                                               'native', mpi_info_int, ierr)
-                        call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size, &
-                                               mpi_p, status, ierr)
-                    end do
-                end if
-
-                if (bubbles_lagrange) then !Lagrangian solver
-                    var_MOK = int(sys_size + 1, MPI_OFFSET_KIND)
+                do i = 1, sys_size
+                    var_MOK = int(i, MPI_OFFSET_KIND)
 
                     ! Initial displacement to skip at beginning of file
                     disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1)
 
-                    call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(sys_size + 1), &
+                    call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), &
                                            'native', mpi_info_int, ierr)
-                    call MPI_FILE_READ(ifile, MPI_IO_DATA%var(sys_size + 1)%sf, data_size, &
-                                       mpi_p, status, ierr)
-                end if
+                    call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size, &
+                                           mpi_p, status, ierr)
+                end do
 
                 call s_mpi_barrier()
 
@@ -544,763 +492,15 @@ impure subroutine s_read_parallel_data_files(t_step)
 
         deallocate (x_cb_glb, y_cb_glb, z_cb_glb)
 
-#endif
-
-    end subroutine s_read_parallel_data_files
-
-    !>  The following subroutine populates the buffer regions of
-        !!      the cell-width spacings, the cell-boundary locations and
-        !!      the cell-center locations. Note that the buffer regions
-        !!      of the last two variables should be interpreted slightly
-        !!      differently than usual. They are really ghost zones that
-        !!      are used in aiding the multidimensional visualization of
-        !!      Silo database files, in VisIt, when processor boundary
-        !!      conditions are present.
-    impure subroutine s_populate_grid_variables_buffer_regions
-
-        integer :: i !< Generic loop iterator
-
-        ! Populating Buffer Regions in the x-direction
-
-        ! Ghost-cell extrapolation BC at the beginning
-        if (bc_x%beg <= BC_GHOST_EXTRAP) then
-
-            do i = 1, buff_size
-                dx(-i) = dx(0)
-            end do
-
-            ! Symmetry BC at the beginning
-        elseif (bc_x%beg == BC_REFLECTIVE) then
-
-            do i = 1, buff_size
-                dx(-i) = dx(i - 1)
-            end do
-
-            ! Periodic BC at the beginning
-        elseif (bc_x%beg == BC_PERIODIC) then
-
-            do i = 1, buff_size
-                dx(-i) = dx((m + 1) - i)
-            end do
-
-            ! Processor BC at the beginning
-        else
-
-            call s_mpi_sendrecv_grid_vars_buffer_regions('beg', 'x')
-
-        end if
-
-        do i = 1, offset_x%beg
-            x_cb(-1 - i) = x_cb(-i) - dx(-i)
-        end do
-
-        do i = 1, buff_size
-            x_cc(-i) = x_cc(1 - i) - (dx(1 - i) + dx(-i))/2._wp
-        end do
-
-        ! Ghost-cell extrapolation BC at the end
-        if (bc_x%end <= BC_GHOST_EXTRAP) then
-
-            do i = 1, buff_size
-                dx(m + i) = dx(m)
-            end do
-
-            ! Symmetry BC at the end
-        elseif (bc_x%end == BC_REFLECTIVE) then
-
-            do i = 1, buff_size
-                dx(m + i) = dx((m + 1) - i)
-            end do
-
-            ! Periodic BC at the end
-        elseif (bc_x%end == BC_PERIODIC) then
-
-            do i = 1, buff_size
-                dx(m + i) = dx(i - 1)
-            end do
-
-            ! Processor BC at the end
-        else
-
-            call s_mpi_sendrecv_grid_vars_buffer_regions('end', 'x')
-
-        end if
-
-        do i = 1, offset_x%end
-            x_cb(m + i) = x_cb(m + (i - 1)) + dx(m + i)
-        end do
-
-        do i = 1, buff_size
-            x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2._wp
-        end do
-
-        ! END: Populating Buffer Regions in the x-direction
-
-        ! Populating Buffer Regions in the y-direction
-
-        if (n > 0) then
-
-            ! Ghost-cell extrapolation BC at the beginning
-            if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_AXIS) then
-
-                do i = 1, buff_size
-                    dy(-i) = dy(0)
-                end do
-
-                ! Symmetry BC at the beginning
-            elseif (bc_y%beg == BC_REFLECTIVE .or. bc_y%beg == BC_AXIS) then
-
-                do i = 1, buff_size
-                    dy(-i) = dy(i - 1)
-                end do
-
-                ! Periodic BC at the beginning
-            elseif (bc_y%beg == BC_PERIODIC) then
-
-                do i = 1, buff_size
-                    dy(-i) = dy((n + 1) - i)
-                end do
-
-                ! Processor BC at the beginning
-            else
-
-                call s_mpi_sendrecv_grid_vars_buffer_regions('beg', 'y')
-
-            end if
-
-            do i = 1, offset_y%beg
-                y_cb(-1 - i) = y_cb(-i) - dy(-i)
-            end do
-
-            do i = 1, buff_size
-                y_cc(-i) = y_cc(1 - i) - (dy(1 - i) + dy(-i))/2._wp
-            end do
-
-            ! Ghost-cell extrapolation BC at the end
-            if (bc_y%end <= BC_GHOST_EXTRAP) then
-
-                do i = 1, buff_size
-                    dy(n + i) = dy(n)
-                end do
-
-                ! Symmetry BC at the end
-            elseif (bc_y%end == BC_REFLECTIVE) then
-
-                do i = 1, buff_size
-                    dy(n + i) = dy((n + 1) - i)
-                end do
-
-                ! Periodic BC at the end
-            elseif (bc_y%end == BC_PERIODIC) then
-
-                do i = 1, buff_size
-                    dy(n + i) = dy(i - 1)
-                end do
-
-                ! Processor BC at the end
-            else
-
-                call s_mpi_sendrecv_grid_vars_buffer_regions('end', 'y')
-
-            end if
-
-            do i = 1, offset_y%end
-                y_cb(n + i) = y_cb(n + (i - 1)) + dy(n + i)
-            end do
-
-            do i = 1, buff_size
-                y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2._wp
-            end do
-
-            ! END: Populating Buffer Regions in the y-direction
-
-            ! Populating Buffer Regions in the z-direction
-
-            if (p > 0) then
-
-                ! Ghost-cell extrapolation BC at the beginning
-                if (bc_z%beg <= BC_GHOST_EXTRAP) then
-
-                    do i = 1, buff_size
-                        dz(-i) = dz(0)
-                    end do
-
-                    ! Symmetry BC at the beginning
-                elseif (bc_z%beg == BC_REFLECTIVE) then
-
-                    do i = 1, buff_size
-                        dz(-i) = dz(i - 1)
-                    end do
-
-                    ! Periodic BC at the beginning
-                elseif (bc_z%beg == BC_PERIODIC) then
-
-                    do i = 1, buff_size
-                        dz(-i) = dz((p + 1) - i)
-                    end do
-
-                    ! Processor BC at the beginning
-                else
-
-                    call s_mpi_sendrecv_grid_vars_buffer_regions('beg', 'z')
-
-                end if
-
-                do i = 1, offset_z%beg
-                    z_cb(-1 - i) = z_cb(-i) - dz(-i)
-                end do
-
-                do i = 1, buff_size
-                    z_cc(-i) = z_cc(1 - i) - (dz(1 - i) + dz(-i))/2._wp
-                end do
-
-                ! Ghost-cell extrapolation BC at the end
-                if (bc_z%end <= BC_GHOST_EXTRAP) then
-
-                    do i = 1, buff_size
-                        dz(p + i) = dz(p)
-                    end do
-
-                    ! Symmetry BC at the end
-                elseif (bc_z%end == BC_REFLECTIVE) then
-
-                    do i = 1, buff_size
-                        dz(p + i) = dz((p + 1) - i)
-                    end do
-
-                    ! Periodic BC at the end
-                elseif (bc_z%end == BC_PERIODIC) then
-
-                    do i = 1, buff_size
-                        dz(p + i) = dz(i - 1)
-                    end do
-
-                    ! Processor BC at the end
-                else
-
-                    call s_mpi_sendrecv_grid_vars_buffer_regions('end', 'z')
-
-                end if
-
-                do i = 1, offset_z%end
-                    z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i)
-                end do
-
-                do i = 1, buff_size
-                    z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2._wp
-                end do
-
-            end if
-
-        end if
-
-        ! END: Populating Buffer Regions in the z-direction
-
-    end subroutine s_populate_grid_variables_buffer_regions
-
-    !>  The purpose of this procedure is to populate the buffers
-        !!      of the cell-average conservative variables, depending on
-        !!      the boundary conditions.
-    impure subroutine s_populate_conservative_variables_buffer_regions(q_particle)
-
-        type(scalar_field), intent(inout), optional :: q_particle
-
-        integer :: i, j, k !< Generic loop iterators
-
-        ! Populating Buffer Regions in the x-direction
-
-        ! Ghost-cell extrapolation BC at the beginning
-        if (bc_x%beg <= BC_GHOST_EXTRAP) then
-
-            do j = 1, buff_size
-                if (present(q_particle)) then
-                    q_particle%sf(-j, 0:n, 0:p) = &
-                        q_particle%sf(0, 0:n, 0:p)
-                else
-                    do i = 1, sys_size
-                        q_cons_vf(i)%sf(-j, 0:n, 0:p) = q_cons_vf(i)%sf(0, 0:n, 0:p)
-                    end do
-                end if
-            end do
-
-            ! Symmetry BC at the beginning
-        elseif (bc_x%beg == BC_REFLECTIVE) then
-
-            do j = 1, buff_size
-
-                if (present(q_particle)) then
-                    q_particle%sf(-j, 0:n, 0:p) = &
-                        q_particle%sf(j - 1, 0:n, 0:p)
-                else
-                    ! Density or partial densities
-                    do i = 1, cont_idx%end
-                        q_cons_vf(i)%sf(-j, 0:n, 0:p) = &
-                            q_cons_vf(i)%sf(j - 1, 0:n, 0:p)
-                    end do
-
-                    ! x-component of momentum
-                    q_cons_vf(mom_idx%beg)%sf(-j, 0:n, 0:p) = &
-                        -q_cons_vf(mom_idx%beg)%sf(j - 1, 0:n, 0:p)
-
-                    ! Remaining momentum component(s), if any, as well as the
-                    ! energy and the variable(s) from advection equation(s)
-                    do i = mom_idx%beg + 1, sys_size
-                        q_cons_vf(i)%sf(-j, 0:n, 0:p) = &
-                            q_cons_vf(i)%sf(j - 1, 0:n, 0:p)
-                    end do
-                end if
-
-            end do
-
-            ! Periodic BC at the beginning
-        elseif (bc_x%beg == BC_PERIODIC) then
-
-            do j = 1, buff_size
-                if (present(q_particle)) then
-                    q_particle%sf(-j, 0:n, 0:p) = &
-                        q_particle%sf((m + 1) - j, 0:n, 0:p)
-                else
-                    do i = 1, sys_size
-                        q_cons_vf(i)%sf(-j, 0:n, 0:p) = &
-                            q_cons_vf(i)%sf((m + 1) - j, 0:n, 0:p)
-                    end do
-                end if
-            end do
-
-            ! Processor BC at the beginning
-        else
-            if (present(q_particle)) then
-                call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                             'beg', 'x', q_particle)
-            else
-                call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                             'beg', 'x')
-            end if
-
-        end if
-
-        ! Ghost-cell extrapolation BC at the end
-        if (bc_x%end <= BC_GHOST_EXTRAP) then
-
-            do j = 1, buff_size
-                if (present(q_particle)) then
-                    q_particle%sf(m + j, 0:n, 0:p) = &
-                        q_particle%sf(m, 0:n, 0:p)
-                else
-                    do i = 1, sys_size
-                        q_cons_vf(i)%sf(m + j, 0:n, 0:p) = &
-                            q_cons_vf(i)%sf(m, 0:n, 0:p)
-                    end do
-                end if
-            end do
-
-            ! Symmetry BC at the end
-        elseif (bc_x%end == BC_REFLECTIVE) then
-
-            do j = 1, buff_size
-
-                if (present(q_particle)) then
-                    q_particle%sf(m + j, 0:n, 0:p) = &
-                        q_particle%sf((m + 1) - j, 0:n, 0:p)
-                else
-
-                    ! Density or partial densities
-                    do i = 1, cont_idx%end
-                        q_cons_vf(i)%sf(m + j, 0:n, 0:p) = &
-                            q_cons_vf(i)%sf((m + 1) - j, 0:n, 0:p)
-                    end do
-
-                    ! x-component of momentum
-                    q_cons_vf(mom_idx%beg)%sf(m + j, 0:n, 0:p) = &
-                        -q_cons_vf(mom_idx%beg)%sf((m + 1) - j, 0:n, 0:p)
-
-                    ! Remaining momentum component(s), if any, as well as the
-                    ! energy and the variable(s) from advection equation(s)
-                    do i = mom_idx%beg + 1, sys_size
-                        q_cons_vf(i)%sf(m + j, 0:n, 0:p) = &
-                            q_cons_vf(i)%sf((m + 1) - j, 0:n, 0:p)
-                    end do
-                end if
-
-            end do
-
-            ! Perodic BC at the end
-        elseif (bc_x%end == BC_PERIODIC) then
-
-            do j = 1, buff_size
-                if (present(q_particle)) then
-                    q_particle%sf(m + j, 0:n, 0:p) = &
-                        q_particle%sf(j - 1, 0:n, 0:p)
-                else
-                    do i = 1, sys_size
-                        q_cons_vf(i)%sf(m + j, 0:n, 0:p) = &
-                            q_cons_vf(i)%sf(j - 1, 0:n, 0:p)
-                    end do
-                end if
-            end do
-
-            ! Processor BC at the end
+        if (bc_io) then
+            call s_read_parallel_boundary_condition_files(bc_type)
         else
-
-            if (present(q_particle)) then
-                call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                             'end', 'x', q_particle)
-            else
-                call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                             'end', 'x')
-            end if
-
-        end if
-
-        ! END: Populating Buffer Regions in the x-direction
-
-        ! Populating Buffer Regions in the y-direction
-
-        if (n > 0) then
-
-            ! Ghost-cell extrapolation BC at the beginning
-            if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_AXIS) then
-
-                do j = 1, buff_size
-                    if (present(q_particle)) then
-                        q_particle%sf(:, -j, 0:p) = q_particle%sf(:, 0, 0:p)
-                    else
-                        do i = 1, sys_size
-                            q_cons_vf(i)%sf(:, -j, 0:p) = q_cons_vf(i)%sf(:, 0, 0:p)
-                        end do
-                    end if
-                end do
-
-                ! Axis BC at the beginning
-            elseif (bc_y%beg == BC_AXIS) then
-
-                do j = 1, buff_size
-                    do k = 0, p
-                        if (z_cc(k) < pi) then
-                            if (present(q_particle)) then
-                                q_particle%sf(:, -j, k) = &
-                                    q_particle%sf(:, j - 1, k + ((p + 1)/2))
-                            else
-                                do i = 1, mom_idx%beg
-                                    q_cons_vf(i)%sf(:, -j, k) = &
-                                        q_cons_vf(i)%sf(:, j - 1, k + ((p + 1)/2))
-                                end do
-
-                                q_cons_vf(mom_idx%beg + 1)%sf(:, -j, k) = &
-                                    -q_cons_vf(mom_idx%beg + 1)%sf(:, j - 1, k + ((p + 1)/2))
-
-                                q_cons_vf(mom_idx%end)%sf(:, -j, k) = &
-                                    -q_cons_vf(mom_idx%end)%sf(:, j - 1, k + ((p + 1)/2))
-
-                                do i = E_idx, sys_size
-                                    q_cons_vf(i)%sf(:, -j, k) = &
-                                        q_cons_vf(i)%sf(:, j - 1, k + ((p + 1)/2))
-                                end do
-                            end if
-                        else
-                            if (present(q_particle)) then
-                                q_particle%sf(:, -j, k) = &
-                                    q_particle%sf(:, j - 1, k - ((p + 1)/2))
-                            else
-                                do i = 1, mom_idx%beg
-                                    q_cons_vf(i)%sf(:, -j, k) = &
-                                        q_cons_vf(i)%sf(:, j - 1, k - ((p + 1)/2))
-                                end do
-
-                                q_cons_vf(mom_idx%beg + 1)%sf(:, -j, k) = &
-                                    -q_cons_vf(mom_idx%beg + 1)%sf(:, j - 1, k - ((p + 1)/2))
-
-                                q_cons_vf(mom_idx%end)%sf(:, -j, k) = &
-                                    -q_cons_vf(mom_idx%end)%sf(:, j - 1, k - ((p + 1)/2))
-
-                                do i = E_idx, sys_size
-                                    q_cons_vf(i)%sf(:, -j, k) = &
-                                        q_cons_vf(i)%sf(:, j - 1, k - ((p + 1)/2))
-                                end do
-                            end if
-                        end if
-                    end do
-                end do
-
-                ! Symmetry BC at the beginning
-            elseif (bc_y%beg == BC_REFLECTIVE) then
-
-                do j = 1, buff_size
-                    if (present(q_particle)) then
-                        q_particle%sf(:, -j, 0:p) = &
-                            q_particle%sf(:, j - 1, 0:p)
-                    else
-                        ! Density or partial densities and x-momentum component
-                        do i = 1, mom_idx%beg
-                            q_cons_vf(i)%sf(:, -j, 0:p) = &
-                                q_cons_vf(i)%sf(:, j - 1, 0:p)
-                        end do
-
-                        ! y-component of momentum
-                        q_cons_vf(mom_idx%beg + 1)%sf(:, -j, 0:p) = &
-                            -q_cons_vf(mom_idx%beg + 1)%sf(:, j - 1, 0:p)
-
-                        ! Remaining z-momentum component, if any, as well as the
-                        ! energy and variable(s) from advection equation(s)
-                        do i = mom_idx%beg + 2, sys_size
-                            q_cons_vf(i)%sf(:, -j, 0:p) = &
-                                q_cons_vf(i)%sf(:, j - 1, 0:p)
-                        end do
-                    end if
-
-                end do
-
-                ! Periodic BC at the beginning
-            elseif (bc_y%beg == BC_PERIODIC) then
-
-                do j = 1, buff_size
-                    if (present(q_particle)) then
-                        q_particle%sf(:, -j, 0:p) = &
-                            q_particle%sf(:, (n + 1) - j, 0:p)
-                    else
-                        do i = 1, sys_size
-                            q_cons_vf(i)%sf(:, -j, 0:p) = &
-                                q_cons_vf(i)%sf(:, (n + 1) - j, 0:p)
-                        end do
-                    end if
-                end do
-
-                ! Processor BC at the beginning
-            else
-                if (present(q_particle)) then
-                    call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                                 'beg', 'y', q_particle)
-                else
-                    call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                                 'beg', 'y')
-                end if
-
-            end if
-
-            ! Ghost-cell extrapolation BC at the end
-            if (bc_y%end <= BC_GHOST_EXTRAP) then
-
-                do j = 1, buff_size
-                    if (present(q_particle)) then
-                        q_particle%sf(:, n + j, 0:p) = &
-                            q_particle%sf(:, n, 0:p)
-                    else
-                        do i = 1, sys_size
-                            q_cons_vf(i)%sf(:, n + j, 0:p) = &
-                                q_cons_vf(i)%sf(:, n, 0:p)
-                        end do
-                    end if
-                end do
-
-                ! Symmetry BC at the end
-            elseif (bc_y%end == BC_REFLECTIVE) then
-
-                do j = 1, buff_size
-                    if (present(q_particle)) then
-                        q_particle%sf(:, n + j, 0:p) = &
-                            q_particle%sf(:, (n + 1) - j, 0:p)
-                    else
-                        ! Density or partial densities and x-momentum component
-                        do i = 1, mom_idx%beg
-                            q_cons_vf(i)%sf(:, n + j, 0:p) = &
-                                q_cons_vf(i)%sf(:, (n + 1) - j, 0:p)
-                        end do
-
-                        ! y-component of momentum
-                        q_cons_vf(mom_idx%beg + 1)%sf(:, n + j, 0:p) = &
-                            -q_cons_vf(mom_idx%beg + 1)%sf(:, (n + 1) - j, 0:p)
-
-                        ! Remaining z-momentum component, if any, as well as the
-                        ! energy and variable(s) from advection equation(s)
-                        do i = mom_idx%beg + 2, sys_size
-                            q_cons_vf(i)%sf(:, n + j, 0:p) = &
-                                q_cons_vf(i)%sf(:, (n + 1) - j, 0:p)
-                        end do
-                    end if
-
-                end do
-
-                ! Perodic BC at the end
-            elseif (bc_y%end == BC_PERIODIC) then
-
-                do j = 1, buff_size
-                    if (present(q_particle)) then
-                        q_particle%sf(:, n + j, 0:p) = &
-                            q_particle%sf(:, j - 1, 0:p)
-                    else
-                        do i = 1, sys_size
-                            q_cons_vf(i)%sf(:, n + j, 0:p) = &
-                                q_cons_vf(i)%sf(:, j - 1, 0:p)
-                        end do
-                    end if
-                end do
-
-                ! Processor BC at the end
-            else
-
-                if (present(q_particle)) then
-                    call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                                 'end', 'y', q_particle)
-                else
-                    call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                                 'end', 'y')
-                end if
-
-            end if
-
-            ! END: Populating Buffer Regions in the y-direction
-
-            ! Populating Buffer Regions in the z-direction
-
-            if (p > 0) then
-
-                ! Ghost-cell extrapolation BC at the beginning
-                if (bc_z%beg <= BC_GHOST_EXTRAP) then
-
-                    do j = 1, buff_size
-                        if (present(q_particle)) then
-                            q_particle%sf(:, :, -j) = q_particle%sf(:, :, 0)
-                        else
-                            do i = 1, sys_size
-                                q_cons_vf(i)%sf(:, :, -j) = q_cons_vf(i)%sf(:, :, 0)
-                            end do
-                        end if
-                    end do
-
-                    ! Symmetry BC at the beginning
-                elseif (bc_z%beg == BC_REFLECTIVE) then
-
-                    do j = 1, buff_size
-                        if (present(q_particle)) then
-                            q_particle%sf(:, :, -j) = &
-                                q_particle%sf(:, :, j - 1)
-                        else
-                            ! Density or the partial densities and the momentum
-                            ! components in x- and y-directions
-                            do i = 1, mom_idx%beg + 1
-                                q_cons_vf(i)%sf(:, :, -j) = &
-                                    q_cons_vf(i)%sf(:, :, j - 1)
-                            end do
-
-                            ! z-component of momentum
-                            q_cons_vf(mom_idx%end)%sf(:, :, -j) = &
-                                -q_cons_vf(mom_idx%end)%sf(:, :, j - 1)
-
-                            ! Energy and advection equation(s) variable(s)
-                            do i = E_idx, sys_size
-                                q_cons_vf(i)%sf(:, :, -j) = &
-                                    q_cons_vf(i)%sf(:, :, j - 1)
-                            end do
-                        end if
-
-                    end do
-
-                    ! Periodic BC at the beginning
-                elseif (bc_z%beg == BC_PERIODIC) then
-
-                    do j = 1, buff_size
-                        if (present(q_particle)) then
-                            q_particle%sf(:, :, -j) = &
-                                q_particle%sf(:, :, (p + 1) - j)
-                        else
-                            do i = 1, sys_size
-                                q_cons_vf(i)%sf(:, :, -j) = &
-                                    q_cons_vf(i)%sf(:, :, (p + 1) - j)
-                            end do
-                        end if
-                    end do
-
-                    ! Processor BC at the beginning
-                else
-
-                    if (present(q_particle)) then
-                        call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                                     'beg', 'z', q_particle)
-                    else
-                        call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                                     'beg', 'z')
-                    end if
-
-                end if
-
-                ! Ghost-cell extrapolation BC at the end
-                if (bc_z%end <= BC_GHOST_EXTRAP) then
-
-                    do j = 1, buff_size
-                        if (present(q_particle)) then
-                            q_particle%sf(:, :, p + j) = &
-                                q_particle%sf(:, :, p)
-                        else
-                            do i = 1, sys_size
-                                q_cons_vf(i)%sf(:, :, p + j) = &
-                                    q_cons_vf(i)%sf(:, :, p)
-                            end do
-                        end if
-                    end do
-
-                    ! Symmetry BC at the end
-                elseif (bc_z%end == BC_REFLECTIVE) then
-
-                    do j = 1, buff_size
-                        if (present(q_particle)) then
-                            q_particle%sf(:, :, p + j) = &
-                                q_particle%sf(:, :, (p + 1) - j)
-                        else
-                            ! Density or the partial densities and the momentum
-                            ! components in x- and y-directions
-                            do i = 1, mom_idx%beg + 1
-                                q_cons_vf(i)%sf(:, :, p + j) = &
-                                    q_cons_vf(i)%sf(:, :, (p + 1) - j)
-                            end do
-
-                            ! z-component of momentum
-                            q_cons_vf(mom_idx%end)%sf(:, :, p + j) = &
-                                -q_cons_vf(mom_idx%end)%sf(:, :, (p + 1) - j)
-
-                            ! Energy and advection equation(s) variable(s)
-                            do i = E_idx, sys_size
-                                q_cons_vf(i)%sf(:, :, p + j) = &
-                                    q_cons_vf(i)%sf(:, :, (p + 1) - j)
-                            end do
-                        end if
-
-                    end do
-
-                    ! Perodic BC at the end
-                elseif (bc_z%end == BC_PERIODIC) then
-
-                    do j = 1, buff_size
-                        if (present(q_particle)) then
-                            q_particle%sf(:, :, p + j) = &
-                                q_particle%sf(:, :, j - 1)
-                        else
-                            do i = 1, sys_size
-                                q_cons_vf(i)%sf(:, :, p + j) = &
-                                    q_cons_vf(i)%sf(:, :, j - 1)
-                            end do
-                        end if
-                    end do
-
-                    ! Processor BC at the end
-                else
-
-                    if (present(q_particle)) then
-                        call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                                     'end', 'z', q_particle)
-                    else
-                        call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                                     'end', 'z')
-                    end if
-
-                end if
-
-            end if
-
+            call s_assign_default_bc_type(bc_type)
         end if
 
-        ! END: Populating Buffer Regions in the z-direction
+#endif
 
-    end subroutine s_populate_conservative_variables_buffer_regions
+    end subroutine s_read_parallel_data_files
 
     !>  Computation of parameters, allocation procedures, and/or
         !!      any other tasks needed to properly setup the module
@@ -1313,7 +513,6 @@ impure subroutine s_initialize_data_input_module
         ! the simulation
         allocate (q_cons_vf(1:sys_size))
         allocate (q_prim_vf(1:sys_size))
-        if (bubbles_lagrange) allocate (q_particle(1))
 
         ! Allocating the parts of the conservative and primitive variables
         ! that do require the direct knowledge of the dimensionality of the
@@ -1340,12 +539,6 @@ impure subroutine s_initialize_data_input_module
                                             -buff_size:p + buff_size))
                 end if
 
-                if (bubbles_lagrange) then
-                    allocate (q_particle(1)%sf(-buff_size:m + buff_size, &
-                                               -buff_size:n + buff_size, &
-                                               -buff_size:p + buff_size))
-                end if
-
                 if (chemistry) then
                     allocate (q_T_sf%sf(-buff_size:m + buff_size, &
                                         -buff_size:n + buff_size, &
@@ -1370,12 +563,6 @@ impure subroutine s_initialize_data_input_module
                                             0:0))
                 end if
 
-                if (bubbles_lagrange) then
-                    allocate (q_particle(1)%sf(-buff_size:m + buff_size, &
-                                               -buff_size:n + buff_size, &
-                                               0:0))
-                end if
-
                 if (chemistry) then
                     allocate (q_T_sf%sf(-buff_size:m + buff_size, &
                                         -buff_size:n + buff_size, &
@@ -1399,16 +586,26 @@ impure subroutine s_initialize_data_input_module
                 allocate (ib_markers%sf(-buff_size:m + buff_size, 0:0, 0:0))
             end if
 
-            if (bubbles_lagrange) then
-                allocate (q_particle(1)%sf(-buff_size:m + buff_size, 0:0, 0:0))
-            end if
-
             if (chemistry) then
                 allocate (q_T_sf%sf(-buff_size:m + buff_size, 0:0, 0:0))
             end if
 
         end if
 
+        ! Allocating arrays to store the bc types
+        allocate (bc_type(1:num_dims, -1:1))
+
+        allocate (bc_type(1, -1)%sf(0:0, 0:n, 0:p))
+        allocate (bc_type(1, 1)%sf(0:0, 0:n, 0:p))
+        if (n > 0) then
+            allocate (bc_type(2, -1)%sf(-buff_size:m + buff_size, 0:0, 0:p))
+            allocate (bc_type(2, 1)%sf(-buff_size:m + buff_size, 0:0, 0:p))
+            if (p > 0) then
+                allocate (bc_type(3, -1)%sf(-buff_size:m + buff_size, -buff_size:n + buff_size, 0:0))
+                allocate (bc_type(3, 1)%sf(-buff_size:m + buff_size, -buff_size:n + buff_size, 0:0))
+            end if
+        end if
+
         if (parallel_io .neqv. .true.) then
             s_read_data_files => s_read_serial_data_files
         else
@@ -1435,15 +632,20 @@ impure subroutine s_finalize_data_input_module
             deallocate (ib_markers%sf)
         end if
 
-        if (bubbles_lagrange) then
-            deallocate (q_particle(1)%sf)
-            deallocate (q_particle)
-        end if
-
         if (chemistry) then
             deallocate (q_T_sf%sf)
         end if
 
+        deallocate (bc_type(1, -1)%sf, bc_type(1, 1)%sf)
+        if (n > 0) then
+            deallocate (bc_type(2, -1)%sf, bc_type(2, 1)%sf)
+            if (p > 0) then
+                deallocate (bc_type(3, -1)%sf, bc_type(3, 1)%sf)
+            end if
+        end if
+
+        deallocate (bc_type)
+
         s_read_data_files => null()
 
     end subroutine s_finalize_data_input_module
diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp
index 2c93cca230..0f1c764670 100644
--- a/src/post_process/m_global_parameters.fpp
+++ b/src/post_process/m_global_parameters.fpp
@@ -124,6 +124,7 @@ module m_global_parameters
     type(int_bounds_info) :: mom_idx               !< Indexes of first & last momentum eqns.
     integer :: E_idx                               !< Index of energy equation
     integer :: n_idx                               !< Index of number density
+    integer :: beta_idx                            !< Index of lagrange bubbles beta
     type(int_bounds_info) :: adv_idx               !< Indexes of first & last advection eqns.
     type(int_bounds_info) :: internalEnergies_idx  !< Indexes of first & last internal energy eqns.
     type(bub_bounds_info) :: bub_idx               !< Indexes of first & last bubble variable eqns.
@@ -147,6 +148,8 @@ module m_global_parameters
     ! Stands for "InDices With BUFFer".
     type(int_bounds_info) :: idwbuff(1:3)
 
+    integer :: num_bc_patches
+    logical :: bc_io
     !> @name Boundary conditions in the x-, y- and z-coordinate directions
     !> @{
     type(int_bounds_info) :: bc_x, bc_y, bc_z
@@ -370,6 +373,8 @@ contains
         bc_x%beg = dflt_int; bc_x%end = dflt_int
         bc_y%beg = dflt_int; bc_y%end = dflt_int
         bc_z%beg = dflt_int; bc_z%end = dflt_int
+        bc_io = .false.
+        num_bc_patches = dflt_int
 
         #:for DIM in ['x', 'y', 'z']
             #:for DIR in [1, 2, 3]
@@ -468,7 +473,7 @@ contains
         integer :: i, j, fac
 
         ! Setting m_root equal to m in the case of a 1D serial simulation
-        if (num_procs == 1 .and. n == 0) m_root = m
+        if (n == 0) m_root = m_glb
 
         ! Gamma/Pi_inf Model
         if (model_eqns == 1) then
@@ -586,6 +591,11 @@ contains
 
             end if
 
+            if (bubbles_lagrange) then
+                beta_idx = sys_size + 1
+                sys_size = beta_idx
+            end if
+
             if (mhd) then
                 B_idx%beg = sys_size + 1
                 if (n == 0) then
@@ -762,21 +772,12 @@ contains
         chemxe = species_idx%end
 
 #ifdef MFC_MPI
-        if (bubbles_lagrange) then
-            allocate (MPI_IO_DATA%view(1:sys_size + 1))
-            allocate (MPI_IO_DATA%var(1:sys_size + 1))
-            do i = 1, sys_size + 1
-                allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p))
-                MPI_IO_DATA%var(i)%sf => null()
-            end do
-        else
-            allocate (MPI_IO_DATA%view(1:sys_size))
-            allocate (MPI_IO_DATA%var(1:sys_size))
-            do i = 1, sys_size
-                allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p))
-                MPI_IO_DATA%var(i)%sf => null()
-            end do
-        end if
+        allocate (MPI_IO_DATA%view(1:sys_size))
+        allocate (MPI_IO_DATA%var(1:sys_size))
+        do i = 1, sys_size
+            allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p))
+            MPI_IO_DATA%var(i)%sf => null()
+        end do
 
         if (ib) allocate (MPI_IO_IB_DATA%var%sf(0:m, 0:n, 0:p))
 #endif
@@ -924,21 +925,18 @@ contains
         integer :: i
 
         ! Deallocating the grid variables for the x-coordinate direction
-        deallocate (x_cb, x_cc, dx)
+        deallocate (x_cc, x_cb, dx)
 
         ! Deallocating grid variables for the y- and z-coordinate directions
         if (n > 0) then
-
-            deallocate (y_cb, y_cc, dy)
-
-            if (p > 0) deallocate (z_cb, z_cc, dz)
-
+            deallocate (y_cc, y_cb, dy)
+            if (p > 0) then
+                deallocate (z_cc, z_cb, dz)
+            end if
+        else
             ! Deallocating the grid variables, only used for the 1D simulations,
             ! and containing the defragmented computational domain grid data
-        else
-
             deallocate (x_root_cb, x_root_cc)
-
         end if
 
         deallocate (proc_coords)
@@ -953,8 +951,6 @@ contains
                 MPI_IO_DATA%var(i)%sf => null()
             end do
 
-            if (bubbles_lagrange) MPI_IO_DATA%var(sys_size + 1)%sf => null()
-
             deallocate (MPI_IO_DATA%var)
             deallocate (MPI_IO_DATA%view)
         end if
diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp
index 2e693488f1..effe0b9bb3 100644
--- a/src/post_process/m_mpi_proxy.fpp
+++ b/src/post_process/m_mpi_proxy.fpp
@@ -23,14 +23,6 @@ module m_mpi_proxy
 
     implicit none
 
-    !> @name Buffers of the conservative variables received/sent from/to neighboring
-    !! processors. Note that these variables are structured as vectors rather
-    !! than arrays.
-    !> @{
-    real(wp), allocatable, dimension(:) :: q_cons_buffer_in
-    real(wp), allocatable, dimension(:) :: q_cons_buffer_out
-    !> @}
-
     !> @name Receive counts and displacement vector variables, respectively, used in
     !! enabling MPI to gather varying amounts of data from all processes to the
     !! root process
@@ -41,7 +33,7 @@ module m_mpi_proxy
 
     !> @name Generic flags used to identify and report MPI errors
     !> @{
-    integer, private :: err_code, ierr
+    integer, private :: ierr
     !> @}
 
 contains
@@ -54,61 +46,6 @@ contains
 
         integer :: i !< Generic loop iterator
 
-        ! Allocating vectorized buffer regions of conservative variables.
-        ! The length of buffer vectors are set according to the size of the
-        ! largest buffer region in the sub-domain.
-        if (buff_size > 0) then
-
-            ! Simulation is at least 2D
-            if (n > 0) then
-
-                ! Simulation is 3D
-                if (p > 0) then
-
-                    allocate (q_cons_buffer_in(0:buff_size* &
-                                               sys_size* &
-                                               (m + 2*buff_size + 1)* &
-                                               (n + 2*buff_size + 1)* &
-                                               (p + 2*buff_size + 1)/ &
-                                               (min(m, n, p) &
-                                                + 2*buff_size + 1) - 1))
-                    allocate (q_cons_buffer_out(0:buff_size* &
-                                                sys_size* &
-                                                (m + 2*buff_size + 1)* &
-                                                (n + 2*buff_size + 1)* &
-                                                (p + 2*buff_size + 1)/ &
-                                                (min(m, n, p) &
-                                                 + 2*buff_size + 1) - 1))
-
-                    ! Simulation is 2D
-                else
-
-                    allocate (q_cons_buffer_in(0:buff_size* &
-                                               sys_size* &
-                                               (max(m, n) &
-                                                + 2*buff_size + 1) - 1))
-                    allocate (q_cons_buffer_out(0:buff_size* &
-                                                sys_size* &
-                                                (max(m, n) &
-                                                 + 2*buff_size + 1) - 1))
-
-                end if
-
-                ! Simulation is 1D
-            else
-
-                allocate (q_cons_buffer_in(0:buff_size*sys_size - 1))
-                allocate (q_cons_buffer_out(0:buff_size*sys_size - 1))
-
-            end if
-
-            ! Initially zeroing out the vectorized buffer region variables
-            ! to avoid possible underflow from any unused allocated memory
-            q_cons_buffer_in = 0._wp
-            q_cons_buffer_out = 0._wp
-
-        end if
-
         ! Allocating and configuring the receive counts and the displacement
         ! vector variables used in variable-gather communication procedures.
         ! Note that these are only needed for either multidimensional runs
@@ -201,1295 +138,6 @@ contains
 
     end subroutine s_mpi_bcast_user_inputs
 
-    !>  This subroutine takes care of efficiently distributing
-        !!      the computational domain among the available processors
-        !!      as well as recomputing some of the global parameters so
-        !!      that they reflect the configuration of sub-domain that
-        !!      is overseen by the local processor.
-    impure subroutine s_mpi_decompose_computational_domain
-
-#ifdef MFC_MPI
-
-        ! # of processors in the x-, y- and z-coordinate directions
-        integer :: num_procs_x, num_procs_y, num_procs_z
-
-        ! Temporary # of processors in x-, y- and z-coordinate directions
-        ! used during the processor factorization optimization procedure
-        real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z
-
-        ! Processor factorization (fct) minimization parameter
-        real(wp) :: fct_min
-
-        ! Cartesian processor topology communicator
-        integer :: MPI_COMM_CART
-
-        ! Number of remaining cells for a particular coordinate direction
-        ! after the bulk has evenly been distributed among the available
-        ! processors for that coordinate direction
-        integer :: rem_cells
-
-        ! Generic loop iterators
-        integer :: i, j
-
-        if (num_procs == 1 .and. parallel_io) then
-            do i = 1, num_dims
-                start_idx(i) = 0
-            end do
-            return
-        end if
-
-        ! Performing the computational domain decomposition. The procedure
-        ! is optimized by ensuring that each processor contains a close to
-        ! equivalent piece of the computational domain. Note that explicit
-        ! type-casting is omitted here for code legibility purposes.
-
-        ! Generating 3D Cartesian Processor Topology
-
-        if (n > 0) then
-
-            if (p > 0) then
-
-                if (cyl_coord .and. p > 0) then
-                    ! Implement pencil processor blocking if using cylindrical coordinates so
-                    ! that all cells in azimuthal direction are stored on a single processor.
-                    ! This is necessary for efficient application of Fourier filter near axis.
-
-                    ! Initial values of the processor factorization optimization
-                    num_procs_x = 1
-                    num_procs_y = num_procs
-                    num_procs_z = 1
-                    ierr = -1
-
-                    ! Computing minimization variable for these initial values
-                    tmp_num_procs_x = num_procs_x
-                    tmp_num_procs_y = num_procs_y
-                    tmp_num_procs_z = num_procs_z
-                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                         - (n + 1)/tmp_num_procs_y)
-
-                    ! Searching for optimal computational domain distribution
-                    do i = 1, num_procs
-
-                        if (mod(num_procs, i) == 0 &
-                            .and. &
-                            (m + 1)/i >= num_stcls_min*weno_order) then
-
-                            tmp_num_procs_x = i
-                            tmp_num_procs_y = num_procs/i
-
-                            if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                               - (n + 1)/tmp_num_procs_y) &
-                                .and. &
-                                (n + 1)/tmp_num_procs_y &
-                                >= &
-                                num_stcls_min*weno_order) then
-
-                                num_procs_x = i
-                                num_procs_y = num_procs/i
-                                fct_min = abs((m + 1)/tmp_num_procs_x &
-                                              - (n + 1)/tmp_num_procs_y)
-                                ierr = 0
-
-                            end if
-
-                        end if
-
-                    end do
-
-                else
-
-                    ! Initial values of the processor factorization optimization
-                    num_procs_x = 1
-                    num_procs_y = 1
-                    num_procs_z = num_procs
-                    ierr = -1
-
-                    ! Computing minimization variable for these initial values
-                    tmp_num_procs_x = num_procs_x
-                    tmp_num_procs_y = num_procs_y
-                    tmp_num_procs_z = num_procs_z
-                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                         - (n + 1)/tmp_num_procs_y) &
-                              + 10._wp*abs((n + 1)/tmp_num_procs_y &
-                                           - (p + 1)/tmp_num_procs_z)
-
-                    ! Searching for optimal computational domain distribution
-                    do i = 1, num_procs
-
-                        if (mod(num_procs, i) == 0 &
-                            .and. &
-                            (m + 1)/i >= num_stcls_min*weno_order) then
-
-                            do j = 1, (num_procs/i)
-
-                                if (mod(num_procs/i, j) == 0 &
-                                    .and. &
-                                    (n + 1)/j >= num_stcls_min*weno_order) then
-
-                                    tmp_num_procs_x = i
-                                    tmp_num_procs_y = j
-                                    tmp_num_procs_z = num_procs/(i*j)
-
-                                    if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                                       - (n + 1)/tmp_num_procs_y) &
-                                        + abs((n + 1)/tmp_num_procs_y &
-                                              - (p + 1)/tmp_num_procs_z) &
-                                        .and. &
-                                        (p + 1)/tmp_num_procs_z &
-                                        >= &
-                                        num_stcls_min*weno_order) &
-                                        then
-
-                                        num_procs_x = i
-                                        num_procs_y = j
-                                        num_procs_z = num_procs/(i*j)
-                                        fct_min = abs((m + 1)/tmp_num_procs_x &
-                                                      - (n + 1)/tmp_num_procs_y) &
-                                                  + abs((n + 1)/tmp_num_procs_y &
-                                                        - (p + 1)/tmp_num_procs_z)
-                                        ierr = 0
-
-                                    end if
-
-                                end if
-
-                            end do
-
-                        end if
-
-                    end do
-
-                end if
-
-                ! Checking whether the decomposition of the computational
-                ! domain was successful
-                if (proc_rank == 0 .and. ierr == -1) then
-                    print '(A)', 'Unable to decompose computational '// &
-                        'domain for selected number of '// &
-                        'processors. Exiting.'
-                    call MPI_ABORT(MPI_COMM_WORLD, 1, ierr)
-                end if
-
-                ! Creating a new communicator using Cartesian topology
-                call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, &
-                                                          num_procs_y, num_procs_z/), &
-                                     (/.true., .true., .true./), &
-                                     .false., MPI_COMM_CART, ierr)
-
-                ! Finding corresponding Cartesian coordinates of the local
-                ! processor rank in newly declared cartesian communicator
-                call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, &
-                                     proc_coords, ierr)
-
-                ! END: Generating 3D Cartesian Processor Topology
-
-                ! Sub-domain Global Parameters in z-direction
-
-                ! Number of remaining cells after majority is distributed
-                rem_cells = mod(p + 1, num_procs_z)
-
-                ! Optimal number of cells per processor
-                p = (p + 1)/num_procs_z - 1
-
-                ! Distributing any remaining cells
-                do i = 1, rem_cells
-                    if (proc_coords(3) == i - 1) then
-                        p = p + 1
-                        exit
-                    end if
-                end do
-
-                ! Boundary condition at the beginning
-                if (proc_coords(3) > 0 .or. bc_z%beg == BC_PERIODIC) then
-                    proc_coords(3) = proc_coords(3) - 1
-                    call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                       bc_z%beg, ierr)
-                    proc_coords(3) = proc_coords(3) + 1
-                end if
-
-                ! Ghost zone at the beginning
-                if (proc_coords(3) > 0 .and. format == 1) then
-                    offset_z%beg = 2
-                else
-                    offset_z%beg = 0
-                end if
-
-                ! Boundary condition at the end
-                if (proc_coords(3) < num_procs_z - 1 .or. bc_z%end == BC_PERIODIC) then
-                    proc_coords(3) = proc_coords(3) + 1
-                    call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                       bc_z%end, ierr)
-                    proc_coords(3) = proc_coords(3) - 1
-                end if
-
-                ! Ghost zone at the end
-                if (proc_coords(3) < num_procs_z - 1 .and. format == 1) then
-                    offset_z%end = 2
-                else
-                    offset_z%end = 0
-                end if
-
-                if (parallel_io) then
-                    if (proc_coords(3) < rem_cells) then
-                        start_idx(3) = (p + 1)*proc_coords(3)
-                    else
-                        start_idx(3) = (p + 1)*proc_coords(3) + rem_cells
-                    end if
-                end if
-
-                ! Generating 2D Cartesian Processor Topology
-
-            else
-
-                ! Initial values of the processor factorization optimization
-                num_procs_x = 1
-                num_procs_y = num_procs
-                ierr = -1
-
-                ! Computing minimization variable for these initial values
-                tmp_num_procs_x = num_procs_x
-                tmp_num_procs_y = num_procs_y
-                fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                     - (n + 1)/tmp_num_procs_y)
-
-                ! Searching for optimal computational domain distribution
-                do i = 1, num_procs
-
-                    if (mod(num_procs, i) == 0 &
-                        .and. &
-                        (m + 1)/i >= num_stcls_min*weno_order) then
-
-                        tmp_num_procs_x = i
-                        tmp_num_procs_y = num_procs/i
-
-                        if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                           - (n + 1)/tmp_num_procs_y) &
-                            .and. &
-                            (n + 1)/tmp_num_procs_y &
-                            >= &
-                            num_stcls_min*weno_order) then
-
-                            num_procs_x = i
-                            num_procs_y = num_procs/i
-                            fct_min = abs((m + 1)/tmp_num_procs_x &
-                                          - (n + 1)/tmp_num_procs_y)
-                            ierr = 0
-
-                        end if
-
-                    end if
-
-                end do
-
-                ! Checking whether the decomposition of the computational
-                ! domain was successful
-                if (proc_rank == 0 .and. ierr == -1) then
-                    print '(A)', 'Unable to decompose computational '// &
-                        'domain for selected number of '// &
-                        'processors. Exiting.'
-                    call MPI_ABORT(MPI_COMM_WORLD, 1, ierr)
-                end if
-
-                ! Creating a new communicator using Cartesian topology
-                call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, &
-                                                          num_procs_y/), (/.true., &
-                                                                           .true./), .false., MPI_COMM_CART, &
-                                     ierr)
-
-                ! Finding corresponding Cartesian coordinates of the local
-                ! processor rank in newly declared cartesian communicator
-                call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, &
-                                     proc_coords, ierr)
-
-            end if
-
-            ! END: Generating 2D Cartesian Processor Topology
-
-            ! Sub-domain Global Parameters in y-direction
-
-            ! Number of remaining cells after majority has been distributed
-            rem_cells = mod(n + 1, num_procs_y)
-
-            ! Optimal number of cells per processor
-            n = (n + 1)/num_procs_y - 1
-
-            ! Distributing any remaining cells
-            do i = 1, rem_cells
-                if (proc_coords(2) == i - 1) then
-                    n = n + 1
-                    exit
-                end if
-            end do
-
-            ! Boundary condition at the beginning
-            if (proc_coords(2) > 0 .or. bc_y%beg == BC_PERIODIC) then
-                proc_coords(2) = proc_coords(2) - 1
-                call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                   bc_y%beg, ierr)
-                proc_coords(2) = proc_coords(2) + 1
-            end if
-
-            ! Ghost zone at the beginning
-            if (proc_coords(2) > 0 .and. format == 1) then
-                offset_y%beg = 2
-            else
-                offset_y%beg = 0
-            end if
-
-            ! Boundary condition at the end
-            if (proc_coords(2) < num_procs_y - 1 .or. bc_y%end == BC_PERIODIC) then
-                proc_coords(2) = proc_coords(2) + 1
-                call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                   bc_y%end, ierr)
-                proc_coords(2) = proc_coords(2) - 1
-            end if
-
-            ! Ghost zone at the end
-            if (proc_coords(2) < num_procs_y - 1 .and. format == 1) then
-                offset_y%end = 2
-            else
-                offset_y%end = 0
-            end if
-
-            if (parallel_io) then
-                if (proc_coords(2) < rem_cells) then
-                    start_idx(2) = (n + 1)*proc_coords(2)
-                else
-                    start_idx(2) = (n + 1)*proc_coords(2) + rem_cells
-                end if
-            end if
-
-            ! Generating 1D Cartesian Processor Topology
-
-        else
-
-            ! Number of processors in the coordinate direction is equal to
-            ! the total number of processors available
-            num_procs_x = num_procs
-
-            ! Number of cells in undecomposed computational domain needed
-            ! for sub-domain reassembly during formatted data output
-            m_root = m
-
-            ! Creating a new communicator using Cartesian topology
-            call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), &
-                                 (/.true./), .false., MPI_COMM_CART, &
-                                 ierr)
-
-            ! Finding the corresponding Cartesian coordinates of the local
-            ! processor rank in the newly declared cartesian communicator
-            call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, &
-                                 proc_coords, ierr)
-
-        end if
-
-        ! Sub-domain Global Parameters in x-direction
-
-        ! Number of remaining cells after majority has been distributed
-        rem_cells = mod(m + 1, num_procs_x)
-
-        ! Optimal number of cells per processor
-        m = (m + 1)/num_procs_x - 1
-
-        ! Distributing any remaining cells
-        do i = 1, rem_cells
-            if (proc_coords(1) == i - 1) then
-                m = m + 1
-                exit
-            end if
-        end do
-
-        ! Boundary condition at the beginning
-        if (proc_coords(1) > 0 .or. bc_x%beg == BC_PERIODIC) then
-            proc_coords(1) = proc_coords(1) - 1
-            call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr)
-            proc_coords(1) = proc_coords(1) + 1
-        end if
-
-        ! Ghost zone at the beginning
-        if (proc_coords(1) > 0 .and. format == 1 .and. n > 0) then
-            offset_x%beg = 2
-        else
-            offset_x%beg = 0
-        end if
-
-        ! Boundary condition at the end
-        if (proc_coords(1) < num_procs_x - 1 .or. bc_x%end == BC_PERIODIC) then
-            proc_coords(1) = proc_coords(1) + 1
-            call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr)
-            proc_coords(1) = proc_coords(1) - 1
-        end if
-
-        ! Ghost zone at the end
-        if (proc_coords(1) < num_procs_x - 1 .and. format == 1 .and. n > 0) then
-            offset_x%end = 2
-        else
-            offset_x%end = 0
-        end if
-
-        if (parallel_io) then
-            if (proc_coords(1) < rem_cells) then
-                start_idx(1) = (m + 1)*proc_coords(1)
-            else
-                start_idx(1) = (m + 1)*proc_coords(1) + rem_cells
-            end if
-        end if
-
-#endif
-
-    end subroutine s_mpi_decompose_computational_domain
-
-    !>  Communicates the buffer regions associated with the grid
-        !!      variables with processors in charge of the neighboring
-        !!      sub-domains. Note that only cell-width spacings feature
-        !!      buffer regions so that no information relating to the
-        !!      cell-boundary locations is communicated.
-        !!  @param pbc_loc Processor boundary condition (PBC) location
-        !!  @param sweep_coord Coordinate direction normal to the processor boundary
-    impure subroutine s_mpi_sendrecv_grid_vars_buffer_regions(pbc_loc, sweep_coord)
-
-        character(LEN=3), intent(in) :: pbc_loc
-        character, intent(in) :: sweep_coord
-
-#ifdef MFC_MPI
-
-        ! Communications in the x-direction
-
-        if (sweep_coord == 'x') then
-
-            if (pbc_loc == 'beg') then    ! Buffer region at the beginning
-
-                ! PBC at both ends of the sub-domain
-                if (bc_x%end >= 0) then
-
-                    ! Sending/receiving the data to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV(dx(m - buff_size + 1), buff_size, &
-                                      mpi_p, bc_x%end, 0, &
-                                      dx(-buff_size), buff_size, &
-                                      mpi_p, bc_x%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at beginning of the sub-domain
-                else
-
-                    ! Sending/receiving the data to/from bc_x%beg/bc_x%beg
-                    call MPI_SENDRECV(dx(0), buff_size, &
-                                      mpi_p, bc_x%beg, 1, &
-                                      dx(-buff_size), buff_size, &
-                                      mpi_p, bc_x%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-            else                         ! Buffer region at the end
-
-                ! PBC at both ends of the sub-domain
-                if (bc_x%beg >= 0) then
-
-                    ! Sending/receiving the data to/from bc_x%beg/bc_x%end
-                    call MPI_SENDRECV(dx(0), buff_size, &
-                                      mpi_p, bc_x%beg, 1, &
-                                      dx(m + 1), buff_size, &
-                                      mpi_p, bc_x%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at end of the sub-domain
-                else
-
-                    ! Sending/receiving the data to/from bc_x%end/bc_x%end
-                    call MPI_SENDRECV(dx(m - buff_size + 1), buff_size, &
-                                      mpi_p, bc_x%end, 0, &
-                                      dx(m + 1), buff_size, &
-                                      mpi_p, bc_x%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-            end if
-
-            ! END: Communications in the x-direction
-
-            ! Communications in the y-direction
-
-        elseif (sweep_coord == 'y') then
-
-            if (pbc_loc == 'beg') then    ! Buffer region at the beginning
-
-                ! PBC at both ends of the sub-domain
-                if (bc_y%end >= 0) then
-
-                    ! Sending/receiving the data to/from bc_y%end/bc_y%beg
-                    call MPI_SENDRECV(dy(n - buff_size + 1), buff_size, &
-                                      mpi_p, bc_y%end, 0, &
-                                      dy(-buff_size), buff_size, &
-                                      mpi_p, bc_y%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at beginning of the sub-domain
-                else
-
-                    ! Sending/receiving the data to/from bc_y%beg/bc_y%beg
-                    call MPI_SENDRECV(dy(0), buff_size, &
-                                      mpi_p, bc_y%beg, 1, &
-                                      dy(-buff_size), buff_size, &
-                                      mpi_p, bc_y%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-            else                         ! Buffer region at the end
-
-                ! PBC at both ends of the sub-domain
-                if (bc_y%beg >= 0) then
-
-                    ! Sending/receiving the data to/from bc_y%beg/bc_y%end
-                    call MPI_SENDRECV(dy(0), buff_size, &
-                                      mpi_p, bc_y%beg, 1, &
-                                      dy(n + 1), buff_size, &
-                                      mpi_p, bc_y%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at end of the sub-domain
-                else
-
-                    ! Sending/receiving the data to/from bc_y%end/bc_y%end
-                    call MPI_SENDRECV(dy(n - buff_size + 1), buff_size, &
-                                      mpi_p, bc_y%end, 0, &
-                                      dy(n + 1), buff_size, &
-                                      mpi_p, bc_y%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-            end if
-
-            ! END: Communications in the y-direction
-
-            ! Communications in the z-direction
-
-        else
-
-            if (pbc_loc == 'beg') then    ! Buffer region at the beginning
-
-                ! PBC at both ends of the sub-domain
-                if (bc_z%end >= 0) then
-
-                    ! Sending/receiving the data to/from bc_z%end/bc_z%beg
-                    call MPI_SENDRECV(dz(p - buff_size + 1), buff_size, &
-                                      mpi_p, bc_z%end, 0, &
-                                      dz(-buff_size), buff_size, &
-                                      mpi_p, bc_z%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at beginning of the sub-domain
-                else
-
-                    ! Sending/receiving the data to/from bc_z%beg/bc_z%beg
-                    call MPI_SENDRECV(dz(0), buff_size, &
-                                      mpi_p, bc_z%beg, 1, &
-                                      dz(-buff_size), buff_size, &
-                                      mpi_p, bc_z%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-            else                         ! Buffer region at the end
-
-                ! PBC at both ends of the sub-domain
-                if (bc_z%beg >= 0) then
-
-                    ! Sending/receiving the data to/from bc_z%beg/bc_z%end
-                    call MPI_SENDRECV(dz(0), buff_size, &
-                                      mpi_p, bc_z%beg, 1, &
-                                      dz(p + 1), buff_size, &
-                                      mpi_p, bc_z%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at end of the sub-domain
-                else
-
-                    ! Sending/receiving the data to/from bc_z%end/bc_z%end
-                    call MPI_SENDRECV(dz(p - buff_size + 1), buff_size, &
-                                      mpi_p, bc_z%end, 0, &
-                                      dz(p + 1), buff_size, &
-                                      mpi_p, bc_z%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-            end if
-
-        end if
-
-        ! END: Communications in the z-direction
-
-#endif
-
-    end subroutine s_mpi_sendrecv_grid_vars_buffer_regions
-
-    !>  Communicates buffer regions associated with conservative
-        !!      variables with processors in charge of the neighboring
-        !!      sub-domains
-        !!  @param q_cons_vf Conservative variables
-        !!  @param pbc_loc Processor boundary condition (PBC) location
-        !!  @param sweep_coord Coordinate direction normal to the processor boundary
-        !!  @param q_particle Projection of the lagrangian particles in the Eulerian framework
-    impure subroutine s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, pbc_loc, &
-                                                              sweep_coord, q_particle)
-
-        type(scalar_field), &
-            dimension(sys_size), &
-            intent(inout) :: q_cons_vf
-
-        character(LEN=3), intent(in) :: pbc_loc
-
-        character, intent(in) :: sweep_coord
-
-        type(scalar_field), &
-            intent(inout), optional :: q_particle
-
-#ifdef MFC_MPI
-
-        integer :: i, j, k, l, r !< Generic loop iterators
-
-        ! Communications in the x-direction
-
-        if (sweep_coord == 'x') then
-
-            if (pbc_loc == 'beg') then    ! Buffer region at the beginning
-
-                ! PBC at both ends of the sub-domain
-                if (bc_x%end >= 0) then
-
-                    ! Packing the data to be sent to bc_x%end
-                    do l = 0, p
-                        do k = 0, n
-                            do j = m - buff_size + 1, m
-                                do i = 1, sys_size
-                                    r = sys_size*(j - m + buff_size - 1) &
-                                        + sys_size*buff_size*k + (i - 1) &
-                                        + sys_size*buff_size*(n + 1)*l
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    ! Sending/receiving the data to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV(q_cons_buffer_out(0), &
-                                      buff_size*sys_size*(n + 1)*(p + 1), &
-                                      mpi_p, bc_x%end, 0, &
-                                      q_cons_buffer_in(0), &
-                                      buff_size*sys_size*(n + 1)*(p + 1), &
-                                      mpi_p, bc_x%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at beginning of the sub-domain
-                else
-
-                    ! Packing the data to be sent to bc_x%beg
-                    do l = 0, p
-                        do k = 0, n
-                            do j = 0, buff_size - 1
-                                do i = 1, sys_size
-                                    r = (i - 1) + sys_size*j &
-                                        + sys_size*buff_size*k &
-                                        + sys_size*buff_size*(n + 1)*l
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    ! Sending/receiving the data to/from bc_x%beg/bc_x%beg
-                    call MPI_SENDRECV(q_cons_buffer_out(0), &
-                                      buff_size*sys_size*(n + 1)*(p + 1), &
-                                      mpi_p, bc_x%beg, 1, &
-                                      q_cons_buffer_in(0), &
-                                      buff_size*sys_size*(n + 1)*(p + 1), &
-                                      mpi_p, bc_x%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-                ! Unpacking the data received from bc_x%beg
-                do l = 0, p
-                    do k = 0, n
-                        do j = -buff_size, -1
-                            do i = 1, sys_size
-                                r = sys_size*(j + buff_size) &
-                                    + sys_size*buff_size*k + (i - 1) &
-                                    + sys_size*buff_size*(n + 1)*l
-                                if (present(q_particle)) then
-                                    q_particle%sf(j, k, l) = q_cons_buffer_in(r)
-                                else
-                                    q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r)
-                                end if
-#if defined(__INTEL_COMPILER)
-                                if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
-                                    print *, "Error", j, k, l, i
-                                    error stop "NaN(s) in recv"
-                                end if
-#endif
-                            end do
-                        end do
-                    end do
-                end do
-
-            else                         ! Buffer region at the end
-
-                ! PBC at both ends of the sub-domain
-                if (bc_x%beg >= 0) then
-
-                    ! Packing the data to be sent to bc_x%beg
-                    do l = 0, p
-                        do k = 0, n
-                            do j = 0, buff_size - 1
-                                do i = 1, sys_size
-                                    r = (i - 1) + sys_size*j &
-                                        + sys_size*buff_size*k &
-                                        + sys_size*buff_size*(n + 1)*l
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    call MPI_SENDRECV(q_cons_buffer_out(0), &
-                                      buff_size*sys_size*(n + 1)*(p + 1), &
-                                      mpi_p, bc_x%beg, 1, &
-                                      q_cons_buffer_in(0), &
-                                      buff_size*sys_size*(n + 1)*(p + 1), &
-                                      mpi_p, bc_x%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at end of the sub-domain
-                else
-
-                    ! Packing the data to be sent to bc_x%end
-                    do l = 0, p
-                        do k = 0, n
-                            do j = m - buff_size + 1, m
-                                do i = 1, sys_size
-                                    r = sys_size*(j - m + buff_size - 1) &
-                                        + sys_size*buff_size*k + (i - 1) &
-                                        + sys_size*buff_size*(n + 1)*l
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    call MPI_SENDRECV(q_cons_buffer_out(0), &
-                                      buff_size*sys_size*(n + 1)*(p + 1), &
-                                      mpi_p, bc_x%end, 0, &
-                                      q_cons_buffer_in(0), &
-                                      buff_size*sys_size*(n + 1)*(p + 1), &
-                                      mpi_p, bc_x%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-                ! Unpacking the data received from bc_x%end
-                do l = 0, p
-                    do k = 0, n
-                        do j = m + 1, m + buff_size
-                            do i = 1, sys_size
-                                r = (i - 1) + sys_size*(j - m - 1) &
-                                    + sys_size*buff_size*k &
-                                    + sys_size*buff_size*(n + 1)*l
-                                if (present(q_particle)) then
-                                    q_particle%sf(j, k, l) = q_cons_buffer_in(r)
-                                else
-                                    q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r)
-                                end if
-#if defined(__INTEL_COMPILER)
-                                if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
-                                    print *, "Error", j, k, l, i
-                                    error stop "NaN(s) in recv"
-                                end if
-#endif
-                            end do
-                        end do
-                    end do
-                end do
-
-            end if
-
-            ! END: Communications in the x-direction
-
-            ! Communications in the y-direction
-
-        elseif (sweep_coord == 'y') then
-
-            if (pbc_loc == 'beg') then    ! Buffer region at the beginning
-
-                ! PBC at both ends of the sub-domain
-                if (bc_y%end >= 0) then
-
-                    ! Packing the data to be sent to bc_y%end
-                    do l = 0, p
-                        do k = n - buff_size + 1, n
-                            do j = -buff_size, m + buff_size
-                                do i = 1, sys_size
-                                    r = sys_size*(j + buff_size) &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        (k - n + buff_size - 1) + (i - 1) &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        buff_size*l
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    ! Sending/receiving the data to/from bc_y%end/bc_y%beg
-                    call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (p + 1), mpi_p, &
-                                      bc_y%end, 0, q_cons_buffer_in(0), &
-                                      buff_size*sys_size* &
-                                      (m + 2*buff_size + 1)*(p + 1), &
-                                      mpi_p, bc_y%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at beginning of the sub-domain
-                else
-
-                    ! Packing the data to be sent to bc_y%beg
-                    do l = 0, p
-                        do k = 0, buff_size - 1
-                            do j = -buff_size, m + buff_size
-                                do i = 1, sys_size
-                                    r = sys_size*(j + buff_size) &
-                                        + sys_size*(m + 2*buff_size + 1)*k &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        buff_size*l + (i - 1)
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    ! Sending/receiving the data to/from bc_y%beg/bc_y%beg
-                    call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (p + 1), mpi_p, &
-                                      bc_y%beg, 1, q_cons_buffer_in(0), &
-                                      buff_size*sys_size* &
-                                      (m + 2*buff_size + 1)*(p + 1), &
-                                      mpi_p, bc_y%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-                ! Unpacking the data received from bc_y%beg
-                do l = 0, p
-                    do k = -buff_size, -1
-                        do j = -buff_size, m + buff_size
-                            do i = 1, sys_size
-                                r = (i - 1) + sys_size*(j + buff_size) &
-                                    + sys_size*(m + 2*buff_size + 1)* &
-                                    (k + buff_size) + sys_size* &
-                                    (m + 2*buff_size + 1)*buff_size*l
-                                if (present(q_particle)) then
-                                    q_particle%sf(j, k, l) = q_cons_buffer_in(r)
-                                else
-                                    q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r)
-                                end if
-#if defined(__INTEL_COMPILER)
-                                if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
-                                    print *, "Error", j, k, l, i
-                                    error stop "NaN(s) in recv"
-                                end if
-#endif
-                            end do
-                        end do
-                    end do
-                end do
-
-            else                         ! Buffer region at the end
-
-                ! PBC at both ends of the sub-domain
-                if (bc_y%beg >= 0) then
-
-                    ! Packing the data to be sent to bc_y%beg
-                    do l = 0, p
-                        do k = 0, buff_size - 1
-                            do j = -buff_size, m + buff_size
-                                do i = 1, sys_size
-                                    r = sys_size*(j + buff_size) &
-                                        + sys_size*(m + 2*buff_size + 1)*k &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        buff_size*l + (i - 1)
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    ! Sending/receiving the data to/from bc_y%beg/bc_y%end
-                    call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (p + 1), mpi_p, &
-                                      bc_y%beg, 1, q_cons_buffer_in(0), &
-                                      buff_size*sys_size* &
-                                      (m + 2*buff_size + 1)*(p + 1), &
-                                      mpi_p, bc_y%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at end of the sub-domain
-                else
-
-                    ! Packing the data to be sent to bc_y%end
-                    do l = 0, p
-                        do k = n - buff_size + 1, n
-                            do j = -buff_size, m + buff_size
-                                do i = 1, sys_size
-                                    r = sys_size*(j + buff_size) &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        (k - n + buff_size - 1) + (i - 1) &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        buff_size*l
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    ! Sending/receiving the data to/from bc_y%end/bc_y%end
-                    call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (p + 1), mpi_p, &
-                                      bc_y%end, 0, q_cons_buffer_in(0), &
-                                      buff_size*sys_size* &
-                                      (m + 2*buff_size + 1)*(p + 1), &
-                                      mpi_p, bc_y%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-                ! Unpacking the data received form bc_y%end
-                do l = 0, p
-                    do k = n + 1, n + buff_size
-                        do j = -buff_size, m + buff_size
-                            do i = 1, sys_size
-                                r = (i - 1) + sys_size*(j + buff_size) &
-                                    + sys_size*(m + 2*buff_size + 1)* &
-                                    (k - n - 1) + sys_size* &
-                                    (m + 2*buff_size + 1)*buff_size*l
-                                if (present(q_particle)) then
-                                    q_particle%sf(j, k, l) = q_cons_buffer_in(r)
-                                else
-                                    q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r)
-                                end if
-#if defined(__INTEL_COMPILER)
-                                if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
-                                    print *, "Error", j, k, l, i
-                                    error stop "NaN(s) in recv"
-                                end if
-#endif
-                            end do
-                        end do
-                    end do
-                end do
-
-            end if
-
-            ! END: Communications in the y-direction
-
-            ! Communications in the z-direction
-
-        else
-
-            if (pbc_loc == 'beg') then    ! Buffer region at the beginning
-
-                ! PBC at both ends of the sub-domain
-                if (bc_z%end >= 0) then
-
-                    ! Packing the data to be sent to bc_z%end
-                    do l = p - buff_size + 1, p
-                        do k = -buff_size, n + buff_size
-                            do j = -buff_size, m + buff_size
-                                do i = 1, sys_size
-                                    r = sys_size*(j + buff_size) &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        (k + buff_size) + sys_size* &
-                                        (m + 2*buff_size + 1)* &
-                                        (n + 2*buff_size + 1)* &
-                                        (l - p + buff_size - 1) + (i - 1)
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    ! Sending/receiving the data to/from bc_z%end/bc_z%beg
-                    call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (n + 2*buff_size + 1), &
-                                      mpi_p, bc_z%end, 0, &
-                                      q_cons_buffer_in(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (n + 2*buff_size + 1), &
-                                      mpi_p, bc_z%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at beginning of the sub-domain
-                else
-
-                    ! Packing the data to be sent to bc_z%beg
-                    do l = 0, buff_size - 1
-                        do k = -buff_size, n + buff_size
-                            do j = -buff_size, m + buff_size
-                                do i = 1, sys_size
-                                    r = sys_size*(j + buff_size) &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        (k + buff_size) + (i - 1) &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        (n + 2*buff_size + 1)*l
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    ! Sending/receiving the data to/from bc_z%beg/bc_z%beg
-                    call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (n + 2*buff_size + 1), &
-                                      mpi_p, bc_z%beg, 1, &
-                                      q_cons_buffer_in(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (n + 2*buff_size + 1), &
-                                      mpi_p, bc_z%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-                ! Unpacking the data from bc_z%beg
-                do l = -buff_size, -1
-                    do k = -buff_size, n + buff_size
-                        do j = -buff_size, m + buff_size
-                            do i = 1, sys_size
-                                r = sys_size*(j + buff_size) &
-                                    + sys_size*(m + 2*buff_size + 1)* &
-                                    (k + buff_size) + (i - 1) &
-                                    + sys_size*(m + 2*buff_size + 1)* &
-                                    (n + 2*buff_size + 1)*(l + buff_size)
-                                if (present(q_particle)) then
-                                    q_particle%sf(j, k, l) = q_cons_buffer_in(r)
-                                else
-                                    q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r)
-                                end if
-#if defined(__INTEL_COMPILER)
-                                if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
-                                    print *, "Error", j, k, l, i
-                                    error stop "NaN(s) in recv"
-                                end if
-#endif
-                            end do
-                        end do
-                    end do
-                end do
-
-            else                         ! Buffer region at the end
-
-                ! PBC at both ends of the sub-domain
-                if (bc_z%beg >= 0) then
-
-                    ! Packing the data to be sent to bc_z%beg
-                    do l = 0, buff_size - 1
-                        do k = -buff_size, n + buff_size
-                            do j = -buff_size, m + buff_size
-                                do i = 1, sys_size
-                                    r = sys_size*(j + buff_size) &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        (k + buff_size) + (i - 1) &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        (n + 2*buff_size + 1)*l
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    ! Sending/receiving the data to/from bc_z%beg/bc_z%end
-                    call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (n + 2*buff_size + 1), &
-                                      mpi_p, bc_z%beg, 1, &
-                                      q_cons_buffer_in(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (n + 2*buff_size + 1), &
-                                      mpi_p, bc_z%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at end of the sub-domain
-                else
-
-                    ! Packing the data to be sent to bc_z%end
-                    do l = p - buff_size + 1, p
-                        do k = -buff_size, n + buff_size
-                            do j = -buff_size, m + buff_size
-                                do i = 1, sys_size
-                                    r = sys_size*(j + buff_size) &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        (k + buff_size) + sys_size* &
-                                        (m + 2*buff_size + 1)* &
-                                        (n + 2*buff_size + 1)* &
-                                        (l - p + buff_size - 1) + (i - 1)
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    ! Sending/receiving the data to/from bc_z%end/bc_z%end
-                    call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (n + 2*buff_size + 1), &
-                                      mpi_p, bc_z%end, 0, &
-                                      q_cons_buffer_in(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (n + 2*buff_size + 1), &
-                                      mpi_p, bc_z%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-                ! Unpacking the data received from bc_z%end
-                do l = p + 1, p + buff_size
-                    do k = -buff_size, n + buff_size
-                        do j = -buff_size, m + buff_size
-                            do i = 1, sys_size
-                                r = sys_size*(j + buff_size) &
-                                    + sys_size*(m + 2*buff_size + 1)* &
-                                    (k + buff_size) + (i - 1) &
-                                    + sys_size*(m + 2*buff_size + 1)* &
-                                    (n + 2*buff_size + 1)*(l - p - 1)
-                                if (present(q_particle)) then
-                                    q_particle%sf(j, k, l) = q_cons_buffer_in(r)
-                                else
-                                    q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r)
-                                end if
-#if defined(__INTEL_COMPILER)
-                                if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
-                                    print *, "Error", j, k, l, i
-                                    error stop "NaN(s) in recv"
-                                end if
-#endif
-                            end do
-                        end do
-                    end do
-                end do
-
-            end if
-
-        end if
-
-        ! END: Communications in the z-direction
-
-#endif
-
-    end subroutine s_mpi_sendrecv_cons_vars_buffer_regions
-
     !>  This subroutine gathers the Silo database metadata for
         !!      the spatial extents in order to boost the performance of
         !!      the multidimensional visualization.
@@ -1654,10 +302,7 @@ contains
     impure subroutine s_mpi_gather_data_extents(q_sf, data_extents)
 
         real(wp), dimension(:, :, :), intent(in) :: q_sf
-
-        real(wp), &
-            dimension(1:2, 0:num_procs - 1), &
-            intent(inout) :: data_extents
+        real(wp), dimension(1:2, 0:num_procs - 1), intent(inout) :: data_extents
 
 #ifdef MFC_MPI
 
@@ -1683,13 +328,8 @@ contains
         !!  @param q_root_sf Flow variable defined on the entire computational domain
     impure subroutine s_mpi_defragment_1d_flow_variable(q_sf, q_root_sf)
 
-        real(wp), &
-            dimension(0:m), &
-            intent(in) :: q_sf
-
-        real(wp), &
-            dimension(0:m), &
-            intent(inout) :: q_root_sf
+        real(wp), dimension(0:m), intent(in) :: q_sf
+        real(wp), dimension(0:m), intent(inout) :: q_root_sf
 
 #ifdef MFC_MPI
 
@@ -1709,12 +349,6 @@ contains
 
 #ifdef MFC_MPI
 
-        ! Deallocating the conservative variables buffer vectors
-        if (buff_size > 0) then
-            deallocate (q_cons_buffer_in)
-            deallocate (q_cons_buffer_out)
-        end if
-
         ! Deallocating the receive counts and the displacement vector
         ! variables used in variable-gather communication procedures
         if ((format == 1 .and. n > 0) .or. n == 0) then
diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90
index e7435f6ad6..0b9435b0b8 100644
--- a/src/post_process/m_start_up.f90
+++ b/src/post_process/m_start_up.f90
@@ -16,6 +16,10 @@ module m_start_up
 
     use m_mpi_proxy             !< Message passing interface (MPI) module proxy
 
+    use m_mpi_common            !< Common MPI subroutines
+
+    use m_boundary_common       !< Common boundary conditions subroutines
+
     use m_variables_conversion  !< Subroutines to change the state variables from
                                 !! one form to another
 
@@ -84,7 +88,8 @@ impure subroutine s_read_input_file
             relax_model, cf_wrt, sigma, adv_n, ib, num_ibs, &
             cfl_adap_dt, cfl_const_dt, t_save, t_stop, n_start, &
             cfl_target, surface_tension, bubbles_lagrange, &
-            sim_data, hyperelasticity, Bx0, relativity, cont_damage
+            sim_data, hyperelasticity, Bx0, relativity, cont_damage, &
+            num_bc_patches
 
         ! Inquiring the status of the post_process.inp file
         file_loc = 'post_process.inp'
@@ -115,6 +120,11 @@ impure subroutine s_read_input_file
 
             if (cfl_adap_dt .or. cfl_const_dt) cfl_dt = .true.
 
+            if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == -17) .or. &
+                num_bc_patches > 0) then
+                bc_io = .true.
+            end if
+
         else
             call s_mpi_abort('File post_process.inp is missing. Exiting.')
         end if
@@ -170,15 +180,10 @@ impure subroutine s_perform_time_step(t_step)
         ! Populating the grid and conservative variables
         call s_read_data_files(t_step)
 
-        ! Populating the buffer regions of the grid variables
+        ! Populating the buffer regions of the grid and conservative variables
         if (buff_size > 0) then
-            call s_populate_grid_variables_buffer_regions()
-        end if
-
-        ! Populating the buffer regions of the conservative variables
-        if (buff_size > 0) then
-            call s_populate_conservative_variables_buffer_regions()
-            if (bubbles_lagrange) call s_populate_conservative_variables_buffer_regions(q_particle(1))
+            call s_populate_grid_variables_buffers()
+            call s_populate_variables_buffers(bc_type, q_cons_vf)
         end if
 
         ! Initialize the Temperature cache.
@@ -650,7 +655,7 @@ impure subroutine s_save_data(t_step, varname, pres, c, H)
         ! Adding the lagrangian subgrid variables  to the formatted database file
         if (bubbles_lagrange) then
             !! Void fraction field
-            q_sf = 1._wp - q_particle(1)%sf( &
+            q_sf = 1._wp - q_cons_vf(beta_idx)%sf( &
                    -offset_x%beg:m + offset_x%end, &
                    -offset_y%beg:n + offset_y%end, &
                    -offset_z%beg:p + offset_z%end)
@@ -681,7 +686,11 @@ impure subroutine s_initialize_modules
         if (bubbles_euler .and. .not. polytropic) then
             call s_initialize_nonpoly()
         end if
-        if (num_procs > 1) call s_initialize_mpi_proxy_module()
+        if (num_procs > 1) then
+            call s_initialize_mpi_proxy_module()
+            call s_initialize_mpi_common_module()
+        end if
+        call s_initialize_boundary_common_module()
         call s_initialize_variables_conversion_module()
         call s_initialize_data_input_module()
         call s_initialize_derived_variables_module()
@@ -734,7 +743,10 @@ impure subroutine s_finalize_modules
         call s_finalize_derived_variables_module()
         call s_finalize_data_input_module()
         call s_finalize_variables_conversion_module()
-        if (num_procs > 1) call s_finalize_mpi_proxy_module()
+        if (num_procs > 1) then
+            call s_finalize_mpi_proxy_module()
+            call s_finalize_mpi_common_module()
+        end if
         call s_finalize_global_parameters_module()
 
         ! Finalizing the MPI environment
diff --git a/src/pre_process/m_boundary_conditions.fpp b/src/pre_process/m_boundary_conditions.fpp
index 4fc2e5ff03..6ce2c07c24 100644
--- a/src/pre_process/m_boundary_conditions.fpp
+++ b/src/pre_process/m_boundary_conditions.fpp
@@ -24,9 +24,7 @@ module m_boundary_conditions
     real(wp) :: radius
     type(bounds_info) :: x_boundary, y_boundary, z_boundary  !<
 
-    private; public :: s_apply_boundary_patches, &
- s_write_serial_boundary_condition_files, &
- s_write_parallel_boundary_condition_files
+    private; public :: s_apply_boundary_patches
 
 contains
     impure subroutine s_line_segment_bc(patch_id, bc_type)
@@ -264,142 +262,4 @@ contains
 
     end subroutine s_apply_boundary_patches
 
-    impure subroutine s_write_serial_boundary_condition_files(q_prim_vf, bc_type, step_dirpath)
-
-        type(scalar_field), dimension(sys_size) :: q_prim_vf
-        type(integer_field), dimension(1:num_dims, -1:1) :: bc_type
-
-        character(LEN=*), intent(in) :: step_dirpath
-
-        integer :: dir, loc
-        character(len=path_len) :: file_path
-
-        character(len=10) :: status
-
-        if (old_grid) then
-            status = 'old'
-        else
-            status = 'new'
-        end if
-
-        call s_pack_boundary_condition_buffers(q_prim_vf)
-
-        file_path = trim(step_dirpath)//'/bc_type.dat'
-        open (1, FILE=trim(file_path), FORM='unformatted', STATUS=status)
-        do dir = 1, num_dims
-            do loc = -1, 1, 2
-                write (1) bc_type(dir, loc)%sf
-            end do
-        end do
-        close (1)
-
-        file_path = trim(step_dirpath)//'/bc_buffers.dat'
-        open (1, FILE=trim(file_path), FORM='unformatted', STATUS=status)
-        do dir = 1, num_dims
-            do loc = -1, 1, 2
-                write (1) bc_buffers(dir, loc)%sf
-            end do
-        end do
-        close (1)
-
-    end subroutine s_write_serial_boundary_condition_files
-
-    impure subroutine s_write_parallel_boundary_condition_files(q_prim_vf, bc_type)
-
-        type(scalar_field), dimension(sys_size) :: q_prim_vf
-        type(integer_field), dimension(1:num_dims, -1:1) :: bc_type
-
-        integer :: dir, loc
-        character(len=path_len) :: file_loc, file_path
-
-#ifdef MFC_MPI
-        integer :: ierr
-        integer :: file_id
-        integer :: offset
-        character(len=7) :: proc_rank_str
-        logical :: dir_check
-
-        call s_pack_boundary_condition_buffers(q_prim_vf)
-
-        file_loc = trim(case_dir)//'/restart_data/boundary_conditions'
-        if (proc_rank == 0) then
-            call my_inquire(file_loc, dir_check)
-            if (dir_check .neqv. .true.) then
-                call s_create_directory(trim(file_loc))
-            end if
-        end if
-
-        call s_create_mpi_types(bc_type)
-
-        call s_mpi_barrier()
-
-        call DelayFileAccess(proc_rank)
-
-        write (proc_rank_str, '(I7.7)') proc_rank
-        file_path = trim(file_loc)//'/bc_'//trim(proc_rank_str)//'.dat'
-        call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_CREATE + MPI_MODE_WRONLY, MPI_INFO_NULL, file_id, ierr)
-
-        offset = 0
-
-        ! Write bc_types
-        do dir = 1, num_dims
-            do loc = -1, 1, 2
-                call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr)
-                call MPI_File_write_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr)
-                offset = offset + sizeof(bc_type(dir, loc)%sf)
-            end do
-        end do
-
-        ! Write bc_buffers
-        do dir = 1, num_dims
-            do loc = -1, 1, 2
-                call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr)
-                call MPI_File_write_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr)
-                offset = offset + sizeof(bc_buffers(dir, loc)%sf)
-            end do
-        end do
-
-        call MPI_File_close(file_id, ierr)
-#endif
-
-    end subroutine s_write_parallel_boundary_condition_files
-
-    impure subroutine s_pack_boundary_condition_buffers(q_prim_vf)
-
-        type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf
-        integer :: i, j, k
-
-        do k = 0, p
-            do j = 0, n
-                do i = 1, sys_size
-                    bc_buffers(1, -1)%sf(i, j, k) = q_prim_vf(i)%sf(0, j, k)
-                    bc_buffers(1, 1)%sf(i, j, k) = q_prim_vf(i)%sf(m, j, k)
-                end do
-            end do
-        end do
-
-        if (n > 0) then
-            do k = 0, p
-                do j = 1, sys_size
-                    do i = 0, m
-                        bc_buffers(2, -1)%sf(i, j, k) = q_prim_vf(j)%sf(i, 0, k)
-                        bc_buffers(2, 1)%sf(i, j, k) = q_prim_vf(j)%sf(i, n, k)
-                    end do
-                end do
-            end do
-
-            if (p > 0) then
-                do k = 1, sys_size
-                    do j = 0, n
-                        do i = 0, m
-                            bc_buffers(3, -1)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, 0)
-                            bc_buffers(3, 1)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, p)
-                        end do
-                    end do
-                end do
-            end if
-        end if
-
-    end subroutine s_pack_boundary_condition_buffers
-
 end module m_boundary_conditions
diff --git a/src/pre_process/m_data_output.fpp b/src/pre_process/m_data_output.fpp
index 86cc884eb0..b703baf50f 100644
--- a/src/pre_process/m_data_output.fpp
+++ b/src/pre_process/m_data_output.fpp
@@ -159,7 +159,7 @@ contains
         end if
 
         if (bc_io) then
-            call s_write_serial_boundary_condition_files(q_prim_vf, bc_type, t_step_dir)
+            call s_write_serial_boundary_condition_files(q_prim_vf, bc_type, t_step_dir, old_grid)
         end if
 
         ! x-coordinate direction
diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp
index 3950fdb42d..b9d60e2ac1 100644
--- a/src/pre_process/m_mpi_proxy.fpp
+++ b/src/pre_process/m_mpi_proxy.fpp
@@ -23,20 +23,8 @@ module m_mpi_proxy
 
     implicit none
 
-    integer, private :: err_code, ierr, v_size !<
-        !! Generic flags used to identify and report MPI errors
-
-    real(wp), private, allocatable, dimension(:), target :: q_prims_buff_send !<
-        !! This variable is utilized to pack and send the buffer of the cell-average
-        !! primitive variables, for a single computational domain boundary at the
-        !! time, to the relevant neighboring processor.
-
-    real(wp), private, allocatable, dimension(:), target :: q_prims_buff_recv !<
-        !! q_prims_buff_recv is utilized to receive and unpack the buffer of the cell-
-        !! average primitive variables, for a single computational domain boundary
-        !! at the time, from the relevant neighboring processor.
-
-    ! integer :: halo_size
+    integer, private :: ierr !<
+    !! Generic flag used to identify and report MPI errors
 
 contains
     !> Since only processor with rank 0 is in charge of reading
@@ -159,458 +147,5 @@ contains
 
     end subroutine s_mpi_bcast_user_inputs
 
-    !> Description: This subroutine takes care of efficiently distributing
-            !!              the computational domain among the available processors
-            !!             as well as recomputing some of the global parameters so
-            !!              that they reflect the configuration of sub-domain that is
-            !!              overseen by the local processor.
-    impure subroutine s_mpi_decompose_computational_domain
-
-#ifdef MFC_MPI
-
-        ! # of processors in the x-, y- and z-coordinate directions
-        integer :: num_procs_x, num_procs_y, num_procs_z
-
-        ! Temporary # of processors in x-, y- and z-coordinate directions
-        ! used during the processor factorization optimization procedure
-        real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z
-
-        ! Processor factorization (fct) minimization parameter
-        real(wp) :: fct_min
-
-        ! Cartesian processor topology communicator
-        integer :: MPI_COMM_CART
-
-        ! Number of remaining cells for a particular coordinate direction
-        ! after the bulk has evenly been distributed among the available
-        ! processors for that coordinate direction
-        integer :: rem_cells
-
-        ! Generic loop iterators
-        integer :: i, j
-
-        if (num_procs == 1 .and. parallel_io) then
-            do i = 1, num_dims
-                start_idx(i) = 0
-            end do
-            return
-        end if
-
-        ! Performing the computational domain decomposition. The procedure
-        ! is optimized by ensuring that each processor contains a close to
-        ! equivalent piece of the computational domain. Note that explicit
-        ! type-casting is omitted here for code legibility purposes.
-
-        ! Generating 3D Cartesian Processor Topology
-
-        if (n > 0) then
-
-            if (p > 0) then
-
-                if (cyl_coord .and. p > 0) then
-                    ! Implement pencil processor blocking if using cylindrical coordinates so
-                    ! that all cells in azimuthal direction are stored on a single processor.
-                    ! This is necessary for efficient application of Fourier filter near axis.
-
-                    ! Initial values of the processor factorization optimization
-                    num_procs_x = 1
-                    num_procs_y = num_procs
-                    num_procs_z = 1
-                    ierr = -1
-
-                    ! Computing minimization variable for these initial values
-                    tmp_num_procs_x = num_procs_x
-                    tmp_num_procs_y = num_procs_y
-                    tmp_num_procs_z = num_procs_z
-                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                         - (n + 1)/tmp_num_procs_y)
-
-                    ! Searching for optimal computational domain distribution
-                    do i = 1, num_procs
-
-                        if (mod(num_procs, i) == 0 &
-                            .and. &
-                            (m + 1)/i >= num_stcls_min*weno_order) then
-
-                            tmp_num_procs_x = i
-                            tmp_num_procs_y = num_procs/i
-
-                            if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                               - (n + 1)/tmp_num_procs_y) &
-                                .and. &
-                                (n + 1)/tmp_num_procs_y &
-                                >= &
-                                num_stcls_min*weno_order) then
-
-                                num_procs_x = i
-                                num_procs_y = num_procs/i
-                                fct_min = abs((m + 1)/tmp_num_procs_x &
-                                              - (n + 1)/tmp_num_procs_y)
-                                ierr = 0
-
-                            end if
-
-                        end if
-
-                    end do
-
-                else
-
-                    ! Initial values of the processor factorization optimization
-                    num_procs_x = 1
-                    num_procs_y = 1
-                    num_procs_z = num_procs
-                    ierr = -1
-
-                    ! Computing minimization variable for these initial values
-                    tmp_num_procs_x = num_procs_x
-                    tmp_num_procs_y = num_procs_y
-                    tmp_num_procs_z = num_procs_z
-                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                         - (n + 1)/tmp_num_procs_y) &
-                              + 10._wp*abs((n + 1)/tmp_num_procs_y &
-                                           - (p + 1)/tmp_num_procs_z)
-
-                    ! Searching for optimal computational domain distribution
-                    do i = 1, num_procs
-
-                        if (mod(num_procs, i) == 0 &
-                            .and. &
-                            (m + 1)/i >= num_stcls_min*weno_order) then
-
-                            do j = 1, (num_procs/i)
-
-                                if (mod(num_procs/i, j) == 0 &
-                                    .and. &
-                                    (n + 1)/j >= num_stcls_min*weno_order) then
-
-                                    tmp_num_procs_x = i
-                                    tmp_num_procs_y = j
-                                    tmp_num_procs_z = num_procs/(i*j)
-
-                                    if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                                       - (n + 1)/tmp_num_procs_y) &
-                                        + abs((n + 1)/tmp_num_procs_y &
-                                              - (p + 1)/tmp_num_procs_z) &
-                                        .and. &
-                                        (p + 1)/tmp_num_procs_z &
-                                        >= &
-                                        num_stcls_min*weno_order) &
-                                        then
-
-                                        num_procs_x = i
-                                        num_procs_y = j
-                                        num_procs_z = num_procs/(i*j)
-                                        fct_min = abs((m + 1)/tmp_num_procs_x &
-                                                      - (n + 1)/tmp_num_procs_y) &
-                                                  + abs((n + 1)/tmp_num_procs_y &
-                                                        - (p + 1)/tmp_num_procs_z)
-                                        ierr = 0
-
-                                    end if
-
-                                end if
-
-                            end do
-
-                        end if
-
-                    end do
-
-                end if
-
-                ! Checking whether the decomposition of the computational
-                ! domain was successful
-                if (proc_rank == 0 .and. ierr == -1) then
-                    print '(A)', 'Unable to decompose computational '// &
-                        'domain for selected number of '// &
-                        'processors. Exiting.'
-                    call MPI_ABORT(MPI_COMM_WORLD, 1, ierr)
-                end if
-
-                ! Creating a new communicator using Cartesian topology
-                call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, &
-                                                          num_procs_y, num_procs_z/), &
-                                     (/.true., .true., .true./), &
-                                     .false., MPI_COMM_CART, ierr)
-
-                ! Finding corresponding Cartesian coordinates of the local
-                ! processor rank in newly declared cartesian communicator
-                call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, &
-                                     proc_coords, ierr)
-
-                ! END: Generating 3D Cartesian Processor Topology
-
-                ! Sub-domain Global Parameters in z-direction
-
-                ! Number of remaining cells after majority is distributed
-                rem_cells = mod(p + 1, num_procs_z)
-
-                ! Preliminary uniform cell-width spacing
-                if (old_grid .neqv. .true.) then
-                    dz = (z_domain%end - z_domain%beg)/real(p + 1, wp)
-                end if
-
-                ! Optimal number of cells per processor
-                p = (p + 1)/num_procs_z - 1
-
-                ! Distributing any remaining cells
-                do i = 1, rem_cells
-                    if (proc_coords(3) == i - 1) then
-                        p = p + 1
-                        exit
-                    end if
-                end do
-
-                ! Boundary condition at the beginning
-                if (proc_coords(3) > 0 .or. (bc_z%beg == BC_PERIODIC .and. num_procs_z > 1)) then
-                    proc_coords(3) = proc_coords(3) - 1
-                    call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                       bc_z%beg, ierr)
-                    proc_coords(3) = proc_coords(3) + 1
-                end if
-
-                ! Boundary condition at the end
-                if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == BC_PERIODIC .and. num_procs_z > 1)) then
-                    proc_coords(3) = proc_coords(3) + 1
-                    call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                       bc_z%end, ierr)
-                    proc_coords(3) = proc_coords(3) - 1
-                end if
-
-                ! Beginning and end sub-domain boundary locations
-                if (parallel_io .neqv. .true.) then
-                    if (old_grid .neqv. .true.) then
-                        if (proc_coords(3) < rem_cells) then
-                            z_domain%beg = z_domain%beg + dz*real((p + 1)* &
-                                                                  proc_coords(3))
-                            z_domain%end = z_domain%end - dz*real((p + 1)* &
-                                                                  (num_procs_z - proc_coords(3) - 1) &
-                                                                  - (num_procs_z - rem_cells))
-                        else
-                            z_domain%beg = z_domain%beg + dz*real((p + 1)* &
-                                                                  proc_coords(3) + rem_cells)
-                            z_domain%end = z_domain%end - dz*real((p + 1)* &
-                                                                  (num_procs_z - proc_coords(3) - 1))
-                        end if
-                    end if
-                else
-                    if (proc_coords(3) < rem_cells) then
-                        start_idx(3) = (p + 1)*proc_coords(3)
-                    else
-                        start_idx(3) = (p + 1)*proc_coords(3) + rem_cells
-                    end if
-                end if
-
-                ! Generating 2D Cartesian Processor Topology
-
-            else
-
-                ! Initial values of the processor factorization optimization
-                num_procs_x = 1
-                num_procs_y = num_procs
-                ierr = -1
-
-                ! Computing minimization variable for these initial values
-                tmp_num_procs_x = num_procs_x
-                tmp_num_procs_y = num_procs_y
-                fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                     - (n + 1)/tmp_num_procs_y)
-
-                ! Searching for optimal computational domain distribution
-                do i = 1, num_procs
-
-                    if (mod(num_procs, i) == 0 &
-                        .and. &
-                        (m + 1)/i >= num_stcls_min*weno_order) then
-
-                        tmp_num_procs_x = i
-                        tmp_num_procs_y = num_procs/i
-
-                        if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                           - (n + 1)/tmp_num_procs_y) &
-                            .and. &
-                            (n + 1)/tmp_num_procs_y &
-                            >= &
-                            num_stcls_min*weno_order) then
-
-                            num_procs_x = i
-                            num_procs_y = num_procs/i
-                            fct_min = abs((m + 1)/tmp_num_procs_x &
-                                          - (n + 1)/tmp_num_procs_y)
-                            ierr = 0
-
-                        end if
-
-                    end if
-
-                end do
-
-                ! Checking whether the decomposition of the computational
-                ! domain was successful
-                if (proc_rank == 0 .and. ierr == -1) then
-                    print '(A)', 'Unable to decompose computational '// &
-                        'domain for selected number of '// &
-                        'processors. Exiting.'
-                    call MPI_ABORT(MPI_COMM_WORLD, 1, ierr)
-                end if
-
-                ! Creating a new communicator using Cartesian topology
-                call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, &
-                                                          num_procs_y/), (/.true., &
-                                                                           .true./), .false., MPI_COMM_CART, &
-                                     ierr)
-                ! Finding corresponding Cartesian coordinates of the local
-                ! processor rank in newly declared cartesian communicator
-                call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, &
-                                     proc_coords, ierr)
-
-            end if
-
-            ! END: Generating 2D Cartesian Processor Topology
-
-            ! Sub-domain Global Parameters in y-direction
-
-            ! Number of remaining cells after majority has been distributed
-            rem_cells = mod(n + 1, num_procs_y)
-
-            ! Preliminary uniform cell-width spacing
-            if (old_grid .neqv. .true.) then
-                dy = (y_domain%end - y_domain%beg)/real(n + 1, wp)
-            end if
-
-            ! Optimal number of cells per processor
-            n = (n + 1)/num_procs_y - 1
-
-            ! Distributing any remaining cells
-            do i = 1, rem_cells
-                if (proc_coords(2) == i - 1) then
-                    n = n + 1
-                    exit
-                end if
-            end do
-
-            ! Boundary condition at the beginning
-            if (proc_coords(2) > 0 .or. (bc_y%beg == BC_PERIODIC .and. num_procs_y > 1)) then
-                proc_coords(2) = proc_coords(2) - 1
-                call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                   bc_y%beg, ierr)
-                proc_coords(2) = proc_coords(2) + 1
-            end if
-
-            ! Boundary condition at the end
-            if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == BC_PERIODIC .and. num_procs_y > 1)) then
-                proc_coords(2) = proc_coords(2) + 1
-                call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                   bc_y%end, ierr)
-                proc_coords(2) = proc_coords(2) - 1
-            end if
-
-            ! Beginning and end sub-domain boundary locations
-            if (parallel_io .neqv. .true.) then
-                if (old_grid .neqv. .true.) then
-                    if (proc_coords(2) < rem_cells) then
-                        y_domain%beg = y_domain%beg + dy*real((n + 1)* &
-                                                              proc_coords(2))
-                        y_domain%end = y_domain%end - dy*real((n + 1)* &
-                                                              (num_procs_y - proc_coords(2) - 1) &
-                                                              - (num_procs_y - rem_cells))
-                    else
-                        y_domain%beg = y_domain%beg + dy*real((n + 1)* &
-                                                              proc_coords(2) + rem_cells)
-                        y_domain%end = y_domain%end - dy*real((n + 1)* &
-                                                              (num_procs_y - proc_coords(2) - 1))
-                    end if
-                end if
-            else
-                if (proc_coords(2) < rem_cells) then
-                    start_idx(2) = (n + 1)*proc_coords(2)
-                else
-                    start_idx(2) = (n + 1)*proc_coords(2) + rem_cells
-                end if
-            end if
-
-            ! Generating 1D Cartesian Processor Topology
-
-        else
-
-            ! Number of processors in the coordinate direction is equal to
-            ! the total number of processors available
-            num_procs_x = num_procs
-
-            ! Creating a new communicator using Cartesian topology
-            call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), &
-                                 (/.true./), .false., MPI_COMM_CART, &
-                                 ierr)
-
-            ! Finding the corresponding Cartesian coordinates of the local
-            ! processor rank in the newly declared cartesian communicator
-            call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, &
-                                 proc_coords, ierr)
-
-        end if
-
-        ! Sub-domain Global Parameters in x-direction
-
-        ! Number of remaining cells after majority has been distributed
-        rem_cells = mod(m + 1, num_procs_x)
-
-        ! Preliminary uniform cell-width spacing
-        if (old_grid .neqv. .true.) then
-            dx = (x_domain%end - x_domain%beg)/real(m + 1, wp)
-        end if
-
-        ! Optimal number of cells per processor
-        m = (m + 1)/num_procs_x - 1
-
-        ! Distributing any remaining cells
-        do i = 1, rem_cells
-            if (proc_coords(1) == i - 1) then
-                m = m + 1
-                exit
-            end if
-        end do
-
-        ! Boundary condition at the beginning
-        if (proc_coords(1) > 0 .or. (bc_x%beg == BC_PERIODIC .and. num_procs_x > 1)) then
-            proc_coords(1) = proc_coords(1) - 1
-            call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr)
-            proc_coords(1) = proc_coords(1) + 1
-        end if
-
-        ! Boundary condition at the end
-        if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == BC_PERIODIC .and. num_procs_x > 1)) then
-            proc_coords(1) = proc_coords(1) + 1
-            call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr)
-            proc_coords(1) = proc_coords(1) - 1
-        end if
-
-        ! Beginning and end sub-domain boundary locations
-        if (parallel_io .neqv. .true.) then
-            if (old_grid .neqv. .true.) then
-                if (proc_coords(1) < rem_cells) then
-                    x_domain%beg = x_domain%beg + dx*real((m + 1)* &
-                                                          proc_coords(1))
-                    x_domain%end = x_domain%end - dx*real((m + 1)* &
-                                                          (num_procs_x - proc_coords(1) - 1) &
-                                                          - (num_procs_x - rem_cells))
-                else
-                    x_domain%beg = x_domain%beg + dx*real((m + 1)* &
-                                                          proc_coords(1) + rem_cells)
-                    x_domain%end = x_domain%end - dx*real((m + 1)* &
-                                                          (num_procs_x - proc_coords(1) - 1))
-                end if
-            end if
-        else
-            if (proc_coords(1) < rem_cells) then
-                start_idx(1) = (m + 1)*proc_coords(1)
-            else
-                start_idx(1) = (m + 1)*proc_coords(1) + rem_cells
-            end if
-        end if
-
-#endif
-
-    end subroutine s_mpi_decompose_computational_domain
 end module m_mpi_proxy
 
diff --git a/src/pre_process/m_perturbation.fpp b/src/pre_process/m_perturbation.fpp
index f4909e89ee..209c752287 100644
--- a/src/pre_process/m_perturbation.fpp
+++ b/src/pre_process/m_perturbation.fpp
@@ -32,8 +32,6 @@ contains
 
     impure subroutine s_initialize_perturbation_module()
 
-        bcxb = bc_x%beg; bcxe = bc_x%end; bcyb = bc_y%beg; bcye = bc_y%end; bczb = bc_z%beg; bcze = bc_z%end
-
         if (mixlayer_perturb) then
             mixlayer_bc_fd = 2
             nbp = n + 2
@@ -624,7 +622,7 @@ contains
         do q = 1, elliptic_smoothing_iters
 
             ! Communication of buffer regions and apply boundary conditions
-            call s_populate_variables_buffers(q_prim_vf, pb%sf, mv%sf, bc_type)
+            call s_populate_variables_buffers(bc_type, q_prim_vf, pb%sf, mv%sf)
 
             ! Perform smoothing and store in temp array
             if (n == 0) then
diff --git a/src/simulation/m_boundary_conditions.fpp b/src/simulation/m_boundary_conditions.fpp
deleted file mode 100644
index d0666e2ca7..0000000000
--- a/src/simulation/m_boundary_conditions.fpp
+++ /dev/null
@@ -1,125 +0,0 @@
-! @file m_boundary_conditions.fpp
-! @brief Contains module m_boundary_conditions
-
-!> @brief This module contains
-module m_boundary_conditions
-
-    use m_derived_types
-
-    use m_global_parameters
-#ifdef MFC_MPI
-    use mpi
-#endif
-    use m_delay_file_access
-
-    use m_compile_specific
-
-    use m_boundary_common
-
-contains
-
-    impure subroutine s_read_serial_boundary_condition_files(step_dirpath, bc_type)
-
-        character(LEN=*), intent(in) :: step_dirpath
-
-        type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type
-
-        integer :: dir, loc
-        logical :: file_exist
-        character(len=path_len) :: file_path
-
-        ! Read bc_types
-        file_path = trim(step_dirpath)//'/bc_type.dat'
-        inquire (FILE=trim(file_path), EXIST=file_exist)
-        if (.not. file_exist) then
-            call s_mpi_abort(trim(file_path)//' is missing. Exiting.')
-        end if
-
-        open (1, FILE=trim(file_path), FORM='unformatted', STATUS='unknown')
-        do dir = 1, num_dims
-            do loc = -1, 1, 2
-                read (1) bc_type(dir, loc)%sf
-                !$acc update device(bc_type(dir, loc)%sf)
-            end do
-        end do
-        close (1)
-
-        ! Read bc_buffers
-        file_path = trim(step_dirpath)//'/bc_buffers.dat'
-        inquire (FILE=trim(file_path), EXIST=file_exist)
-        if (.not. file_exist) then
-            call s_mpi_abort(trim(file_path)//' is missing. Exiting.')
-        end if
-
-        open (1, FILE=trim(file_path), FORM='unformatted', STATUS='unknown')
-        do dir = 1, num_dims
-            do loc = -1, 1, 2
-                read (1) bc_buffers(dir, loc)%sf
-                !$acc update device(bc_buffers(dir, loc)%sf)
-            end do
-        end do
-        close (1)
-
-    end subroutine s_read_serial_boundary_condition_files
-
-    impure subroutine s_read_parallel_boundary_condition_files(bc_type)
-
-        type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type
-
-        integer :: dir, loc
-        character(len=path_len) :: file_loc, file_path
-
-#ifdef MFC_MPI
-        integer :: ierr
-        integer :: file_id
-        integer :: offset
-        character(len=7) :: proc_rank_str
-        logical :: dir_check
-
-        file_loc = trim(case_dir)//'/restart_data/boundary_conditions'
-
-        if (proc_rank == 0) then
-            call my_inquire(file_loc, dir_check)
-            if (dir_check .neqv. .true.) then
-                call s_mpi_abort(trim(file_loc)//' is missing. Exiting.')
-            end if
-        end if
-
-        call s_create_mpi_types(bc_type)
-
-        call s_mpi_barrier()
-
-        call DelayFileAccess(proc_rank)
-
-        write (proc_rank_str, '(I7.7)') proc_rank
-        file_path = trim(file_loc)//'/bc_'//trim(proc_rank_str)//'.dat'
-        call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_RDONLY, MPI_INFO_NULL, file_id, ierr)
-
-        offset = 0
-
-        ! Read bc_types
-        do dir = 1, num_dims
-            do loc = -1, 1, 2
-                call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr)
-                call MPI_File_read_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr)
-                offset = offset + sizeof(bc_type(dir, loc)%sf)
-                !$acc update device(bc_type(dir, loc)%sf)
-            end do
-        end do
-
-        ! Read bc_buffers
-        do dir = 1, num_dims
-            do loc = -1, 1, 2
-                call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr)
-                call MPI_File_read_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr)
-                offset = offset + sizeof(bc_buffers(dir, loc)%sf)
-                !$acc update device(bc_buffers(dir, loc)%sf)
-            end do
-        end do
-
-        call MPI_File_close(file_id, ierr)
-#endif
-
-    end subroutine s_read_parallel_boundary_condition_files
-
-end module m_boundary_conditions
diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
index d0b94f6b7a..9362f013ad 100644
--- a/src/simulation/m_global_parameters.fpp
+++ b/src/simulation/m_global_parameters.fpp
@@ -190,8 +190,6 @@ module m_global_parameters
 
     integer :: num_bc_patches
     logical :: bc_io
-    integer :: BC_RIEMANN_EXTRAPOLATION
-    integer :: BC_GHOST_EXTRAPOLATION
     !> @name Boundary conditions (BC) in the x-, y- and z-directions, respectively
     !> @{
     type(int_bounds_info) :: bc_x, bc_y, bc_z
@@ -584,8 +582,6 @@ contains
 
         num_bc_patches = 0
         bc_io = .false.
-        BC_RIEMANN_EXTRAPOLATION = -4
-        BC_GHOST_EXTRAPOLATION = -3
 
         bc_x%beg = dflt_int; bc_x%end = dflt_int
         bc_y%beg = dflt_int; bc_y%end = dflt_int
diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp
index c7c96890aa..0314aad864 100644
--- a/src/simulation/m_ibm.fpp
+++ b/src/simulation/m_ibm.fpp
@@ -85,7 +85,7 @@ contains
         !$acc update device(levelset_norm%sf)
 
         ! Get neighboring IB variables from other processors
-        call s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers)
+        call s_populate_ib_buffers()
 
         !$acc update host(ib_markers%sf)
 
@@ -108,6 +108,18 @@ contains
 
     end subroutine s_ibm_setup
 
+    subroutine s_populate_ib_buffers()
+
+        #:for DIRC, DIRI in [('x', 1), ('y', 2), ('z', 3)]
+            #:for LOCC, LOCI in [('beg', -1), ('end', 1)]
+                if (bc_${DIRC}$%${LOCC}$ >= 0) then
+                    call s_mpi_sendrecv_ib_buffers(ib_markers, ${DIRI}$, ${LOCI}$)
+                end if
+            #:endfor
+        #:endfor
+
+    end subroutine s_populate_ib_buffers
+
     !>  Subroutine that updates the conservative variables at the ghost points
         !!  @param q_cons_vf Conservative Variables
         !!  @param q_prim_vf Primitive variables
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index f931227f4d..ca4a4e3e8e 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -32,24 +32,52 @@ module m_mpi_proxy
 
     implicit none
 
-    integer, private, allocatable, dimension(:), target :: ib_buff_send !<
+    integer, private, allocatable, dimension(:) :: ib_buff_send !<
     !! This variable is utilized to pack and send the buffer of the immersed
     !! boundary markers, for a single computational domain boundary at the
     !! time, to the relevant neighboring processor.
 
-    integer, private, allocatable, dimension(:), target :: ib_buff_recv !<
+    integer, private, allocatable, dimension(:) :: ib_buff_recv !<
     !! q_cons_buff_recv is utilized to receive and unpack the buffer of the
     !! immersed boundary markers, for a single computational domain boundary
     !! at the time, from the relevant neighboring processor.
 
     !> @name Generic flags used to identify and report MPI errors
     !> @{
-    integer, private :: err_code, ierr, v_size
+    integer, private :: ierr
     !> @}
-    !$acc declare create(v_size)
+
+    integer :: i_halo_size
+    !$acc declare create(i_halo_size)
 
 contains
 
+    subroutine s_initialize_mpi_proxy_module()
+
+#ifdef MFC_MPI
+        if (ib) then
+            if (n > 0) then
+                if (p > 0) then
+                    i_halo_size = -1 + gp_layers* &
+                                            & (m + 2*gp_layers + 1)* &
+                                            & (n + 2*gp_layers + 1)* &
+                                            & (p + 2*gp_layers + 1)/ &
+                                            & (min(m, n, p) + 2*gp_layers + 1)
+                else
+                    i_halo_size = -1 + gp_layers* &
+                                            & (max(m, n) + 2*gp_layers + 1)
+                end if
+            else
+                i_halo_size = -1 + gp_layers
+            end if
+
+            !$acc update device(i_halo_size)
+            @:ALLOCATE(ib_buff_send(0:i_halo_size), ib_buff_recv(0:i_halo_size))
+        end if
+#endif
+
+    end subroutine s_initialize_mpi_proxy_module
+
     !>  Since only the processor with rank 0 reads and verifies
         !!      the consistency of user inputs, these are initially not
         !!      available to the other processors. Then, the purpose of
@@ -207,1405 +235,155 @@ contains
 
     end subroutine s_mpi_bcast_user_inputs
 
-    !>  The purpose of this procedure is to optimally decompose
-        !!      the computational domain among the available processors.
-        !!      This is performed by attempting to award each processor,
-        !!      in each of the coordinate directions, approximately the
-        !!      same number of cells, and then recomputing the affected
-        !!      global parameters.
-    impure subroutine s_mpi_decompose_computational_domain
-
-#ifdef MFC_MPI
-
-        integer :: num_procs_x, num_procs_y, num_procs_z !<
-            !! Optimal number of processors in the x-, y- and z-directions
-
-        real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z !<
-            !! Non-optimal number of processors in the x-, y- and z-directions
-
-        real(wp) :: fct_min !<
-            !! Processor factorization (fct) minimization parameter
-
-        integer :: MPI_COMM_CART !<
-            !! Cartesian processor topology communicator
-
-        integer :: rem_cells !<
-            !! Remaining number of cells, in a particular coordinate direction,
-            !! after the majority is divided up among the available processors
-
-        integer :: i, j !< Generic loop iterators
-
-        if (num_procs == 1 .and. parallel_io) then
-            do i = 1, num_dims
-                start_idx(i) = 0
-            end do
-            return
-        end if
-
-        ! 3D Cartesian Processor Topology
-        if (n > 0) then
-
-            if (p > 0) then
-
-                if (cyl_coord .and. p > 0) then
-                    ! Implement pencil processor blocking if using cylindrical coordinates so
-                    ! that all cells in azimuthal direction are stored on a single processor.
-                    ! This is necessary for efficient application of Fourier filter near axis.
-
-                    ! Initial values of the processor factorization optimization
-                    num_procs_x = 1
-                    num_procs_y = num_procs
-                    num_procs_z = 1
-                    ierr = -1
-
-                    ! Computing minimization variable for these initial values
-                    tmp_num_procs_x = num_procs_x
-                    tmp_num_procs_y = num_procs_y
-                    tmp_num_procs_z = num_procs_z
-                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                         - (n + 1)/tmp_num_procs_y)
-
-                    ! Searching for optimal computational domain distribution
-                    do i = 1, num_procs
-
-                        if (mod(num_procs, i) == 0 &
-                            .and. &
-                            (m + 1)/i >= num_stcls_min*weno_order) then
-
-                            tmp_num_procs_x = i
-                            tmp_num_procs_y = num_procs/i
-
-                            if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                               - (n + 1)/tmp_num_procs_y) &
-                                .and. &
-                                (n + 1)/tmp_num_procs_y &
-                                >= &
-                                num_stcls_min*weno_order) then
-
-                                num_procs_x = i
-                                num_procs_y = num_procs/i
-                                fct_min = abs((m + 1)/tmp_num_procs_x &
-                                              - (n + 1)/tmp_num_procs_y)
-                                ierr = 0
-
-                            end if
-
-                        end if
-
-                    end do
-
-                else
-
-                    ! Initial estimate of optimal processor topology
-                    num_procs_x = 1
-                    num_procs_y = 1
-                    num_procs_z = num_procs
-                    ierr = -1
-
-                    ! Benchmarking the quality of this initial guess
-                    tmp_num_procs_x = num_procs_x
-                    tmp_num_procs_y = num_procs_y
-                    tmp_num_procs_z = num_procs_z
-                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                         - (n + 1)/tmp_num_procs_y) &
-                              + 10._wp*abs((n + 1)/tmp_num_procs_y &
-                                           - (p + 1)/tmp_num_procs_z)
-
-                    ! Optimization of the initial processor topology
-                    do i = 1, num_procs
-
-                        if (mod(num_procs, i) == 0 &
-                            .and. &
-                            (m + 1)/i >= num_stcls_min*weno_order) then
-
-                            do j = 1, num_procs/i
-
-                                if (mod(num_procs/i, j) == 0 &
-                                    .and. &
-                                    (n + 1)/j >= num_stcls_min*weno_order) then
-
-                                    tmp_num_procs_x = i
-                                    tmp_num_procs_y = j
-                                    tmp_num_procs_z = num_procs/(i*j)
-
-                                    if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                                       - (n + 1)/tmp_num_procs_y) &
-                                        + abs((n + 1)/tmp_num_procs_y &
-                                              - (p + 1)/tmp_num_procs_z) &
-                                        .and. &
-                                        (p + 1)/tmp_num_procs_z &
-                                        >= &
-                                        num_stcls_min*weno_order) &
-                                        then
-
-                                        num_procs_x = i
-                                        num_procs_y = j
-                                        num_procs_z = num_procs/(i*j)
-                                        fct_min = abs((m + 1)/tmp_num_procs_x &
-                                                      - (n + 1)/tmp_num_procs_y) &
-                                                  + abs((n + 1)/tmp_num_procs_y &
-                                                        - (p + 1)/tmp_num_procs_z)
-                                        ierr = 0
-
-                                    end if
-
-                                end if
-
-                            end do
-
-                        end if
-
-                    end do
-
-                end if
-
-                ! Verifying that a valid decomposition of the computational
-                ! domain has been established. If not, the simulation exits.
-                if (proc_rank == 0 .and. ierr == -1) then
-                    call s_mpi_abort('Unsupported combination of values '// &
-                                     'of num_procs, m, n, p and '// &
-                                     'weno_order. Exiting.')
-                end if
-
-                ! Creating new communicator using the Cartesian topology
-                call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, &
-                                                          num_procs_y, num_procs_z/), &
-                                     (/.true., .true., .true./), &
-                                     .false., MPI_COMM_CART, ierr)
-
-                ! Finding the Cartesian coordinates of the local process
-                call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, &
-                                     proc_coords, ierr)
-                ! END: 3D Cartesian Processor Topology
-
-                ! Global Parameters for z-direction
-
-                ! Number of remaining cells
-                rem_cells = mod(p + 1, num_procs_z)
-
-                ! Optimal number of cells per processor
-                p = (p + 1)/num_procs_z - 1
-
-                ! Distributing the remaining cells
-                do i = 1, rem_cells
-                    if (proc_coords(3) == i - 1) then
-                        p = p + 1; exit
-                    end if
-                end do
-
-                ! Boundary condition at the beginning
-                if (proc_coords(3) > 0 .or. (bc_z%beg == BC_PERIODIC .and. num_procs_z > 1)) then
-                    proc_coords(3) = proc_coords(3) - 1
-                    call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                       bc_z%beg, ierr)
-                    proc_coords(3) = proc_coords(3) + 1
-                end if
-
-                ! Boundary condition at the end
-                if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == BC_PERIODIC .and. num_procs_z > 1)) then
-                    proc_coords(3) = proc_coords(3) + 1
-                    call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                       bc_z%end, ierr)
-                    proc_coords(3) = proc_coords(3) - 1
-                end if
-
-                if (parallel_io) then
-                    if (proc_coords(3) < rem_cells) then
-                        start_idx(3) = (p + 1)*proc_coords(3)
-                    else
-                        start_idx(3) = (p + 1)*proc_coords(3) + rem_cells
-                    end if
-                end if
-
-                ! 2D Cartesian Processor Topology
-            else
-
-                ! Initial estimate of optimal processor topology
-                num_procs_x = 1
-                num_procs_y = num_procs
-                ierr = -1
-
-                ! Benchmarking the quality of this initial guess
-                tmp_num_procs_x = num_procs_x
-                tmp_num_procs_y = num_procs_y
-                fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                     - (n + 1)/tmp_num_procs_y)
-
-                ! Optimization of the initial processor topology
-                do i = 1, num_procs
-
-                    if (mod(num_procs, i) == 0 &
-                        .and. &
-                        (m + 1)/i >= num_stcls_min*weno_order) then
-
-                        tmp_num_procs_x = i
-                        tmp_num_procs_y = num_procs/i
-
-                        if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                           - (n + 1)/tmp_num_procs_y) &
-                            .and. &
-                            (n + 1)/tmp_num_procs_y &
-                            >= &
-                            num_stcls_min*weno_order) then
-
-                            num_procs_x = i
-                            num_procs_y = num_procs/i
-                            fct_min = abs((m + 1)/tmp_num_procs_x &
-                                          - (n + 1)/tmp_num_procs_y)
-                            ierr = 0
-
-                        end if
-
-                    end if
-
-                end do
-
-                ! Verifying that a valid decomposition of the computational
-                ! domain has been established. If not, the simulation exits.
-                if (proc_rank == 0 .and. ierr == -1) then
-                    call s_mpi_abort('Unsupported combination of values '// &
-                                     'of num_procs, m, n and '// &
-                                     'weno_order. Exiting.')
-                end if
-
-                ! Creating new communicator using the Cartesian topology
-                call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, &
-                                                          num_procs_y/), (/.true., &
-                                                                           .true./), .false., MPI_COMM_CART, &
-                                     ierr)
-
-                ! Finding the Cartesian coordinates of the local process
-                call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, &
-                                     proc_coords, ierr)
-
-            end if
-            ! END: 2D Cartesian Processor Topology
-
-            ! Global Parameters for y-direction
-
-            ! Number of remaining cells
-            rem_cells = mod(n + 1, num_procs_y)
-
-            ! Optimal number of cells per processor
-            n = (n + 1)/num_procs_y - 1
-
-            ! Distributing the remaining cells
-            do i = 1, rem_cells
-                if (proc_coords(2) == i - 1) then
-                    n = n + 1; exit
-                end if
-            end do
-
-            ! Boundary condition at the beginning
-            if (proc_coords(2) > 0 .or. (bc_y%beg == BC_PERIODIC .and. num_procs_y > 1)) then
-                proc_coords(2) = proc_coords(2) - 1
-                call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                   bc_y%beg, ierr)
-                proc_coords(2) = proc_coords(2) + 1
-            end if
-
-            ! Boundary condition at the end
-            if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == BC_PERIODIC .and. num_procs_y > 1)) then
-                proc_coords(2) = proc_coords(2) + 1
-                call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                   bc_y%end, ierr)
-                proc_coords(2) = proc_coords(2) - 1
-            end if
-
-            if (parallel_io) then
-                if (proc_coords(2) < rem_cells) then
-                    start_idx(2) = (n + 1)*proc_coords(2)
-                else
-                    start_idx(2) = (n + 1)*proc_coords(2) + rem_cells
-                end if
-            end if
-
-            ! 1D Cartesian Processor Topology
-        else
-
-            ! Optimal processor topology
-            num_procs_x = num_procs
-
-            ! Creating new communicator using the Cartesian topology
-            call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), &
-                                 (/.true./), .false., MPI_COMM_CART, &
-                                 ierr)
-
-            ! Finding the Cartesian coordinates of the local process
-            call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, &
-                                 proc_coords, ierr)
-
-        end if
-
-        ! Global Parameters for x-direction
-
-        ! Number of remaining cells
-        rem_cells = mod(m + 1, num_procs_x)
-
-        ! Optimal number of cells per processor
-        m = (m + 1)/num_procs_x - 1
-
-        ! Distributing the remaining cells
-        do i = 1, rem_cells
-            if (proc_coords(1) == i - 1) then
-                m = m + 1; exit
-            end if
-        end do
-
-        ! Boundary condition at the beginning
-        if (proc_coords(1) > 0 .or. (bc_x%beg == BC_PERIODIC .and. num_procs_x > 1)) then
-            proc_coords(1) = proc_coords(1) - 1
-            call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr)
-            proc_coords(1) = proc_coords(1) + 1
-        end if
-
-        ! Boundary condition at the end
-        if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == BC_PERIODIC .and. num_procs_x > 1)) then
-            proc_coords(1) = proc_coords(1) + 1
-            call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr)
-            proc_coords(1) = proc_coords(1) - 1
-        end if
-
-        if (parallel_io) then
-            if (proc_coords(1) < rem_cells) then
-                start_idx(1) = (m + 1)*proc_coords(1)
-            else
-                start_idx(1) = (m + 1)*proc_coords(1) + rem_cells
-            end if
-        end if
-
-#endif
-
-    end subroutine s_mpi_decompose_computational_domain
-
-    !>  The goal of this procedure is to populate the buffers of
-        !!      the grid variables by communicating with the neighboring
-        !!      processors. Note that only the buffers of the cell-width
-        !!      distributions are handled in such a way. This is because
-        !!      the buffers of cell-boundary locations may be calculated
-        !!      directly from those of the cell-width distributions.
-        !!  @param mpi_dir MPI communication coordinate direction
-        !!  @param pbc_loc Processor boundary condition (PBC) location
-    impure subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc)
-
-        integer, intent(in) :: mpi_dir
-        integer, intent(in) :: pbc_loc
-
-#ifdef MFC_MPI
-
-        ! MPI Communication in x-direction
-        if (mpi_dir == 1) then
-
-            if (pbc_loc == -1) then      ! PBC at the beginning
-
-                if (bc_x%end >= 0) then      ! PBC at the beginning and end
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        dx(m - buff_size + 1), buff_size, &
-                        mpi_p, bc_x%end, 0, &
-                        dx(-buff_size), buff_size, &
-                        mpi_p, bc_x%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                else                        ! PBC at the beginning only
-
-                    ! Send/receive buffer to/from bc_x%beg/bc_x%beg
-                    call MPI_SENDRECV( &
-                        dx(0), buff_size, &
-                        mpi_p, bc_x%beg, 1, &
-                        dx(-buff_size), buff_size, &
-                        mpi_p, bc_x%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                end if
-
-            else                        ! PBC at the end
-
-                if (bc_x%beg >= 0) then      ! PBC at the end and beginning
-
-                    ! Send/receive buffer to/from bc_x%beg/bc_x%end
-                    call MPI_SENDRECV( &
-                        dx(0), buff_size, &
-                        mpi_p, bc_x%beg, 1, &
-                        dx(m + 1), buff_size, &
-                        mpi_p, bc_x%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                else                        ! PBC at the end only
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%end
-                    call MPI_SENDRECV( &
-                        dx(m - buff_size + 1), buff_size, &
-                        mpi_p, bc_x%end, 0, &
-                        dx(m + 1), buff_size, &
-                        mpi_p, bc_x%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                end if
-
-            end if
-            ! END: MPI Communication in x-direction
-
-            ! MPI Communication in y-direction
-        elseif (mpi_dir == 2) then
-
-            if (pbc_loc == -1) then      ! PBC at the beginning
-
-                if (bc_y%end >= 0) then      ! PBC at the beginning and end
-
-                    ! Send/receive buffer to/from bc_y%end/bc_y%beg
-                    call MPI_SENDRECV( &
-                        dy(n - buff_size + 1), buff_size, &
-                        mpi_p, bc_y%end, 0, &
-                        dy(-buff_size), buff_size, &
-                        mpi_p, bc_y%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                else                        ! PBC at the beginning only
-
-                    ! Send/receive buffer to/from bc_y%beg/bc_y%beg
-                    call MPI_SENDRECV( &
-                        dy(0), buff_size, &
-                        mpi_p, bc_y%beg, 1, &
-                        dy(-buff_size), buff_size, &
-                        mpi_p, bc_y%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                end if
-
-            else                        ! PBC at the end
-
-                if (bc_y%beg >= 0) then      ! PBC at the end and beginning
-
-                    ! Send/receive buffer to/from bc_y%beg/bc_y%end
-                    call MPI_SENDRECV( &
-                        dy(0), buff_size, &
-                        mpi_p, bc_y%beg, 1, &
-                        dy(n + 1), buff_size, &
-                        mpi_p, bc_y%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                else                        ! PBC at the end only
-
-                    ! Send/receive buffer to/from bc_y%end/bc_y%end
-                    call MPI_SENDRECV( &
-                        dy(n - buff_size + 1), buff_size, &
-                        mpi_p, bc_y%end, 0, &
-                        dy(n + 1), buff_size, &
-                        mpi_p, bc_y%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                end if
-
-            end if
-            ! END: MPI Communication in y-direction
-
-            ! MPI Communication in z-direction
-        else
-
-            if (pbc_loc == -1) then      ! PBC at the beginning
-
-                if (bc_z%end >= 0) then      ! PBC at the beginning and end
-
-                    ! Send/receive buffer to/from bc_z%end/bc_z%beg
-                    call MPI_SENDRECV( &
-                        dz(p - buff_size + 1), buff_size, &
-                        mpi_p, bc_z%end, 0, &
-                        dz(-buff_size), buff_size, &
-                        mpi_p, bc_z%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                else                        ! PBC at the beginning only
-
-                    ! Send/receive buffer to/from bc_z%beg/bc_z%beg
-                    call MPI_SENDRECV( &
-                        dz(0), buff_size, &
-                        mpi_p, bc_z%beg, 1, &
-                        dz(-buff_size), buff_size, &
-                        mpi_p, bc_z%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                end if
-
-            else                        ! PBC at the end
-
-                if (bc_z%beg >= 0) then      ! PBC at the end and beginning
-
-                    ! Send/receive buffer to/from bc_z%beg/bc_z%end
-                    call MPI_SENDRECV( &
-                        dz(0), buff_size, &
-                        mpi_p, bc_z%beg, 1, &
-                        dz(p + 1), buff_size, &
-                        mpi_p, bc_z%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                else                        ! PBC at the end only
-
-                    ! Send/receive buffer to/from bc_z%end/bc_z%end
-                    call MPI_SENDRECV( &
-                        dz(p - buff_size + 1), buff_size, &
-                        mpi_p, bc_z%end, 0, &
-                        dz(p + 1), buff_size, &
-                        mpi_p, bc_z%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                end if
-
-            end if
-
-        end if
-        ! END: MPI Communication in z-direction
-
-#endif
-
-    end subroutine s_mpi_sendrecv_grid_variables_buffers
-
-    !>  The goal of this procedure is to populate the buffers of
-        !!      the cell-average conservative variables by communicating
-        !!      with the neighboring processors.
-    impure subroutine s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers)
+    subroutine s_mpi_sendrecv_ib_buffers(ib_markers, mpi_dir, pbc_loc)
 
         type(integer_field), intent(inout) :: ib_markers
-        integer, intent(in) :: gp_layers
-
-        integer :: j, k, l, r !< Generic loop iterators
-        integer, pointer, dimension(:) :: p_i_send, p_i_recv
-
-#ifdef MFC_MPI
-
-        if (n > 0) then
-            if (p > 0) then
-                @:ALLOCATE(ib_buff_send(0:-1 + gp_layers * &
-                                        & (m + 2*gp_layers + 1)* &
-                                        & (n + 2*gp_layers + 1)* &
-                                        & (p + 2*gp_layers + 1)/ &
-                                        & (min(m, n, p) + 2*gp_layers + 1)))
-            else
-                @:ALLOCATE(ib_buff_send(0:-1 + gp_layers* &
-                                        & (max(m, n) + 2*gp_layers + 1)))
-            end if
-        else
-            @:ALLOCATE(ib_buff_send(0:-1 + gp_layers))
-        end if
-        @:ALLOCATE(ib_buff_recv(0:ubound(ib_buff_send, 1)))
-
-        !nCalls_time = nCalls_time + 1
-
-        ! MPI Communication in x-direction
-        if (bc_x%beg >= 0) then      ! PBC at the beginning
-
-            if (bc_x%end >= 0) then      ! PBC at the beginning and end
-
-                ! Packing buffer to be sent to bc_x%end
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = 0, p
-                    do k = 0, n
-                        do j = m - gp_layers + 1, m
-                            r = ((j - m - 1) + gp_layers*((k + 1) + (n + 1)*l))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
-                        end do
-                    end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
 
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
+        integer, intent(in) :: mpi_dir, pbc_loc
 
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
+        integer :: i, j, k, l, r, q !< Generic loop iterators
 
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%end, 0, &
-                        p_i_recv(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+        integer :: buffer_counts(1:3), buffer_count
 
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
+        type(int_bounds_info) :: boundary_conditions(1:3)
+        integer :: beg_end(1:2), grid_dims(1:3)
+        integer :: dst_proc, src_proc, recv_tag, send_tag
 
-                    !$acc update host(ib_buff_send, ib_buff_send)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%end, 0, &
-                        ib_buff_recv(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+        logical :: beg_end_geq_0, qbmm_comm
 
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            else                        ! PBC at the beginning only
-
-                ! Packing buffer to be sent to bc_x%beg
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = 0, p
-                    do k = 0, n
-                        do j = 0, gp_layers - 1
-                            r = (j + gp_layers*(k + (n + 1)*l))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
-                        end do
-                    end do
-                end do
+        integer :: pack_offset, unpack_offset
 
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
+        integer, pointer :: p_i_send, p_i_recv
 
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
+#ifdef MFC_MPI
 
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
+        call nvtxStartRange("IB-MARKER-COMM-PACKBUF")
 
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%beg, 1, &
-                        p_i_recv(0), &
+        buffer_counts = (/ &
                         gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
+                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1) &
+                        /)
 
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
-                    !$acc update host(ib_buff_send)
+        buffer_count = buffer_counts(mpi_dir)
+        boundary_conditions = (/bc_x, bc_y, bc_z/)
+        beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/)
+        beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0
 
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%beg, 1, &
-                        ib_buff_recv(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+        ! Implements:
+        ! pbc_loc  bc_x >= 0 -> [send/recv]_tag  [dst/src]_proc
+        ! -1 (=0)      0            ->     [1,0]       [0,0]      | 0 0 [1,0] [beg,beg]
+        ! -1 (=0)      1            ->     [0,0]       [1,0]      | 0 1 [0,0] [end,beg]
+        ! +1 (=1)      0            ->     [0,1]       [1,1]      | 1 0 [0,1] [end,end]
+        ! +1 (=1)      1            ->     [1,1]       [0,1]      | 1 1 [1,1] [beg,end]
 
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            end if
+        send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1))
+        recv_tag = f_logical_to_int(pbc_loc == 1)
 
-#if defined(MFC_OpenACC)
-            if (rdma_mpi .eqv. .false.) then
-                !$acc update device(ib_buff_recv)
-            end if
-#endif
+        dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0)))
+        src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1))
 
-            ! Unpacking buffer received from bc_x%beg
-            !$acc parallel loop collapse(3) gang vector default(present) private(r)
-            do l = 0, p
-                do k = 0, n
-                    do j = -gp_layers, -1
-                        r = (j + gp_layers*((k + 1) + (n + 1)*l))
-                        ib_markers%sf(j, k, l) = ib_buff_recv(r)
-                    end do
-                end do
-            end do
+        grid_dims = (/m, n, p/)
 
+        pack_offset = 0
+        if (f_xor(pbc_loc == 1, beg_end_geq_0)) then
+            pack_offset = grid_dims(mpi_dir) - gp_layers + 1
         end if
 
-        if (bc_x%end >= 0) then                ! PBC at the end
-
-            if (bc_x%beg >= 0) then      ! PBC at the end and beginning
-
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                ! Packing buffer to be sent to bc_x%beg
-                do l = 0, p
-                    do k = 0, n
-                        do j = 0, gp_layers - 1
-                            r = (j + gp_layers*(k + (n + 1)*l))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
-                        end do
-                    end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%beg, 1, &
-                        p_i_recv(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
-
-                    !$acc update host(ib_buff_send)
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%beg, 1, &
-                        ib_buff_recv(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            else                        ! PBC at the end only
-
-                ! Packing buffer to be sent to bc_x%end
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = 0, p
-                    do k = 0, n
-                        do j = m - gp_layers + 1, m
-                            r = ((j - m - 1) + gp_layers*((k + 1) + (n + 1)*l))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
-                        end do
-                    end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%end, 0, &
-                        p_i_recv(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
-
-                    !$acc update host(ib_buff_send)
-
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%end, 0, &
-                        ib_buff_recv(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            end if
-
-            if (rdma_mpi .eqv. .false.) then
-                !$acc update device(ib_buff_recv)
-            end if
-
-            ! Unpacking buffer received from bc_x%end
-            !$acc parallel loop collapse(3) gang vector default(present) private(r)
-            do l = 0, p
-                do k = 0, n
-                    do j = m + 1, m + gp_layers
-                        r = ((j - m - 1) + gp_layers*(k + (n + 1)*l))
-                        ib_markers%sf(j, k, l) = ib_buff_recv(r)
-                    end do
-                end do
-            end do
-
+        unpack_offset = 0
+        if (pbc_loc == 1) then
+            unpack_offset = grid_dims(mpi_dir) + gp_layers + 1
         end if
-        ! END: MPI Communication in x-direction
-
-        ! MPI Communication in y-direction
-
-        if (bc_y%beg >= 0) then      ! PBC at the beginning
-
-            if (bc_y%end >= 0) then      ! PBC at the beginning and end
-
-                ! Packing buffer to be sent to bc_y%end
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = 0, p
-                    do k = n - gp_layers + 1, n
-                        do j = -gp_layers, m + gp_layers
-                            r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                                 ((k - n + gp_layers - 1) + gp_layers*l))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
-                        end do
-                    end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%end, 0, &
-                        p_i_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
 
-                    !$acc update host(ib_buff_send)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%end, 0, &
-                        ib_buff_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            else                        ! PBC at the beginning only
-
-                ! Packing buffer to be sent to bc_y%beg
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = 0, p
-                    do k = 0, gp_layers - 1
-                        do j = -gp_layers, m + gp_layers
-                            r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                                 (k + gp_layers*l))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
+        ! Pack Buffer to Send
+        #:for mpi_dir in [1, 2, 3]
+            if (mpi_dir == ${mpi_dir}$) then
+                #:if mpi_dir == 1
+                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
+                    do l = 0, p
+                        do k = 0, n
+                            do j = 0, gp_layers - 1
+                                r = (j + gp_layers*(k + (n + 1)*l))
+                                ib_buff_send(r) = ib_markers%sf(j + pack_offset, k, l)
+                            end do
                         end do
                     end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%beg, 1, &
-                        p_i_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
-
-                    !$acc update host(ib_buff_send)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%beg, 1, &
-                        ib_buff_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            end if
-
-#if defined(MFC_OpenACC)
-            if (rdma_mpi .eqv. .false.) then
-                !$acc update device(ib_buff_recv)
-            end if
-#endif
-
-            ! Unpacking buffer received from bc_y%beg
-            !$acc parallel loop collapse(3) gang vector default(present) private(r)
-            do l = 0, p
-                do k = -gp_layers, -1
-                    do j = -gp_layers, m + gp_layers
-                        r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                             ((k + gp_layers) + gp_layers*l))
-                        ib_markers%sf(j, k, l) = ib_buff_recv(r)
-                    end do
-                end do
-            end do
-
-        end if
-
-        if (bc_y%end >= 0) then                   ! PBC at the end
-
-            if (bc_y%beg >= 0) then      ! PBC at the end and beginning
-
-                ! Packing buffer to be sent to bc_y%beg
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = 0, p
-                    do k = 0, gp_layers - 1
-                        do j = -gp_layers, m + gp_layers
-                            r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                                 (k + gp_layers*l))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
+                #:elif mpi_dir == 2
+                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
+                    do l = 0, p
+                        do k = 0, gp_layers - 1
+                            do j = -gp_layers, m + gp_layers
+                                r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
+                                     (k + gp_layers*l))
+                                ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l)
+                            end do
                         end do
                     end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%beg, 1, &
-                        p_i_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
-
-                    !$acc update host(ib_buff_send)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%beg, 1, &
-                        ib_buff_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            else                        ! PBC at the end only
-
-                ! Packing buffer to be sent to bc_y%end
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = 0, p
-                    do k = n - gp_layers + 1, n
-                        do j = -gp_layers, m + gp_layers
-                            r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                                 ((k - n + gp_layers - 1) + gp_layers*l))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
+                #:else
+                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
+                    do l = 0, gp_layers - 1
+                        do k = -gp_layers, n + gp_layers
+                            do j = -gp_layers, m + gp_layers
+                                r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
+                                     ((k + gp_layers) + (n + 2*gp_layers + 1)*l))
+                                ib_buff_send(r) = ib_markers%sf(j, k, l + pack_offset)
+                            end do
                         end do
                     end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%end, 0, &
-                        p_i_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
-
-                    !$acc update host(ib_buff_send)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%end, 0, &
-                        ib_buff_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
+                #:endif
             end if
-
-#if defined(MFC_OpenACC)
-            if (rdma_mpi .eqv. .false.) then
-                !$acc update device(ib_buff_recv)
-            end if
-#endif
-
-            ! Unpacking buffer received form bc_y%end
-            !$acc parallel loop collapse(3) gang vector default(present) private(r)
-            do l = 0, p
-                do k = n + 1, n + gp_layers
-                    do j = -gp_layers, m + gp_layers
-                        r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                             ((k - n - 1) + gp_layers*l))
-                        ib_markers%sf(j, k, l) = ib_buff_recv(r)
-                    end do
-                end do
-            end do
-
-        end if
-        ! END: MPI Communication in y-direction
-
-        ! MPI Communication in z-direction
-        if (bc_z%beg >= 0) then      ! PBC at the beginning
-
-            if (bc_z%end >= 0) then      ! PBC at the beginning and end
-
-                ! Packing buffer to be sent to bc_z%end
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = p - gp_layers + 1, p
-                    do k = -gp_layers, n + gp_layers
-                        do j = -gp_layers, m + gp_layers
-                            r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                                 ((k + gp_layers) + (n + 2*gp_layers + 1)* &
-                                  (l - p + gp_layers - 1)))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
-                        end do
-                    end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%end, 0, &
-                        p_i_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
-
-                    !$acc update host(ib_buff_send)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%end, 0, &
-                        ib_buff_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            else                        ! PBC at the beginning only
-
-                ! Packing buffer to be sent to bc_z%beg
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = 0, gp_layers - 1
-                    do k = -gp_layers, n + gp_layers
-                        do j = -gp_layers, m + gp_layers
-                            r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                                 ((k + gp_layers) + (n + 2*gp_layers + 1)*l))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
+        #:endfor
+        call nvtxEndRange ! Packbuf
+
+        call nvtxStartRange("IB-MARKER-SENDRECV")
+        call MPI_SENDRECV( &
+            ib_buff_send, buffer_count, MPI_INTEGER, dst_proc, send_tag, &
+            ib_buff_recv, buffer_count, MPI_INTEGER, src_proc, recv_tag, &
+            MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+        call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA
+
+        ! Unpack Received Buffer
+        call nvtxStartRange("IB-MARKER-COMM-UNPACKBUF")
+        #:for mpi_dir in [1, 2, 3]
+            if (mpi_dir == ${mpi_dir}$) then
+                #:if mpi_dir == 1
+                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
+                    do l = 0, p
+                        do k = 0, n
+                            do j = -gp_layers, -1
+                                r = (j + gp_layers*((k + 1) + (n + 1)*l))
+                                ib_markers%sf(j + unpack_offset, k, l) = ib_buff_recv(r)
+                            end do
                         end do
                     end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%beg, 1, &
-                        p_i_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
-
-                    !$acc update host(ib_buff_send)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%beg, 1, &
-                        ib_buff_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            end if
-
-#if defined(MFC_OpenACC)
-            if (rdma_mpi .eqv. .false.) then
-                !$acc update device(ib_buff_recv)
-            end if
-#endif
-
-            ! Unpacking buffer from bc_z%beg
-            !$acc parallel loop collapse(3) gang vector default(present) private(r)
-            do l = -gp_layers, -1
-                do k = -gp_layers, n + gp_layers
-                    do j = -gp_layers, m + gp_layers
-                        r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                             ((k + gp_layers) + (n + 2*gp_layers + 1)* &
-                              (l + gp_layers)))
-                        ib_markers%sf(j, k, l) = ib_buff_recv(r)
-                    end do
-                end do
-            end do
-
-        end if
-
-        if (bc_z%end >= 0) then                       ! PBC at the end
-
-            if (bc_z%beg >= 0) then      ! PBC at the end and beginning
-
-                ! Packing buffer to be sent to bc_z%beg
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = 0, gp_layers - 1
-                    do k = -gp_layers, n + gp_layers
-                        do j = -gp_layers, m + gp_layers
-                            r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                                 ((k + gp_layers) + (n + 2*gp_layers + 1)*l))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
+                #:elif mpi_dir == 2
+                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
+                    do l = 0, p
+                        do k = -gp_layers, -1
+                            do j = -gp_layers, m + gp_layers
+                                r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
+                                     ((k + gp_layers) + gp_layers*l))
+                                ib_markers%sf(j, k + unpack_offset, l) = ib_buff_recv(r)
+                            end do
                         end do
                     end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%beg, 1, &
-                        p_i_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
-                    !$acc update host(ib_buff_send)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%beg, 1, &
-                        ib_buff_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            else                        ! PBC at the end only
-
-                ! Packing buffer to be sent to bc_z%end
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = p - gp_layers + 1, p
-                    do k = -gp_layers, n + gp_layers
-                        do j = -gp_layers, m + gp_layers
-                            r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                                 ((k + gp_layers) + (n + 2*gp_layers + 1)* &
-                                  (l - p + gp_layers - 1)))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
+                #:else
+                    ! Unpacking buffer from bc_z%beg
+                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
+                    do l = -gp_layers, -1
+                        do k = -gp_layers, n + gp_layers
+                            do j = -gp_layers, m + gp_layers
+                                r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
+                                     ((k + gp_layers) + (n + 2*gp_layers + 1)* &
+                                      (l + gp_layers)))
+                                ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r)
+                            end do
                         end do
                     end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%end, 0, &
-                        p_i_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
-                    !$acc update host(ib_buff_send)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%end, 0, &
-                        ib_buff_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
+                #:endif
             end if
-
-#if defined(MFC_OpenACC)
-            if (rdma_mpi .eqv. .false.) then
-                !$acc update device(ib_buff_recv)
-            end if
-#endif
-
-            ! Unpacking buffer received from bc_z%end
-            !$acc parallel loop collapse(3) gang vector default(present) private(r)
-            do l = p + 1, p + gp_layers
-                do k = -gp_layers, n + gp_layers
-                    do j = -gp_layers, m + gp_layers
-                        r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                             ((k + gp_layers) + (n + 2*gp_layers + 1)* &
-                              (l - p - 1)))
-                        ib_markers%sf(j, k, l) = ib_buff_recv(r)
-                    end do
-                end do
-            end do
-
-        end if
-
-        ! END: MPI Communication in z-direction
-
+        #:endfor
+        call nvtxEndRange
 #endif
 
     end subroutine s_mpi_sendrecv_ib_buffers
@@ -1613,9 +391,21 @@ contains
     impure subroutine s_mpi_send_random_number(phi_rn, num_freq)
         integer, intent(in) :: num_freq
         real(wp), intent(inout), dimension(1:num_freq) :: phi_rn
+
 #ifdef MFC_MPI
         call MPI_BCAST(phi_rn, num_freq, mpi_p, 0, MPI_COMM_WORLD, ierr)
 #endif
+
     end subroutine s_mpi_send_random_number
 
+    subroutine s_finalize_mpi_proxy_module()
+
+#ifdef MFC_MPI
+        if (ib) then
+            @:DEALLOCATE(ib_buff_send, ib_buff_recv)
+        end if
+#endif
+
+    end subroutine s_finalize_mpi_proxy_module
+
 end module m_mpi_proxy
diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp
index 44aa3e604f..93bcd55113 100644
--- a/src/simulation/m_rhs.fpp
+++ b/src/simulation/m_rhs.fpp
@@ -649,7 +649,7 @@ contains
         call nvtxEndRange
 
         call nvtxStartRange("RHS-COMMUNICATION")
-        call s_populate_variables_buffers(q_prim_qp%vf, pb, mv, bc_type)
+        call s_populate_variables_buffers(bc_type, q_prim_qp%vf, pb, mv)
         call nvtxEndRange
 
         call nvtxStartRange("RHS-ELASTIC")
diff --git a/src/simulation/m_sim_helpers.f90 b/src/simulation/m_sim_helpers.f90
index a05ba92bf1..5f80d9c0a2 100644
--- a/src/simulation/m_sim_helpers.f90
+++ b/src/simulation/m_sim_helpers.f90
@@ -10,8 +10,7 @@ module m_sim_helpers
 
     private; public :: s_compute_enthalpy, &
  s_compute_stability_from_dt, &
- s_compute_dt_from_cfl, &
- s_assign_default_bc_type
+ s_compute_dt_from_cfl
 
 contains
 
@@ -268,26 +267,4 @@ pure subroutine s_compute_dt_from_cfl(vel, c, max_dt, rho, Re_l, j, k, l)
 
     end subroutine s_compute_dt_from_cfl
 
-    pure subroutine s_assign_default_bc_type(bc_type)
-
-        type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type
-
-        bc_type(1, -1)%sf(:, :, :) = bc_x%beg
-        bc_type(1, 1)%sf(:, :, :) = bc_x%end
-        !$acc update device(bc_type(1,-1)%sf, bc_type(1,1)%sf)
-
-        if (n > 0) then
-            bc_type(2, -1)%sf(:, :, :) = bc_y%beg
-            bc_type(2, 1)%sf(:, :, :) = bc_y%end
-            !$acc update device(bc_type(2,-1)%sf, bc_type(2,1)%sf)
-
-            if (p > 0) then
-                bc_type(3, -1)%sf(:, :, :) = bc_z%beg
-                bc_type(3, 1)%sf(:, :, :) = bc_z%end
-                !$acc update device(bc_type(3,-1)%sf, bc_type(3,1)%sf)
-            end if
-        end if
-
-    end subroutine s_assign_default_bc_type
-
 end module m_sim_helpers
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index dbfda5f83b..330cb75712 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -96,7 +96,6 @@ module m_start_up
  s_read_data_files, &
  s_read_serial_data_files, &
  s_read_parallel_data_files, &
- s_populate_grid_variables_buffers, &
  s_initialize_internal_energy_equations, &
  s_initialize_modules, s_initialize_gpu_vars, &
  s_initialize_mpi_domain, s_finalize_modules, &
@@ -959,219 +958,7 @@ contains
 
     end subroutine s_read_parallel_data_files
 
-    !> The purpose of this subroutine is to populate the buffers
-        !!          of the grid variables, which are constituted of the cell-
-        !!          boundary locations and cell-width distributions, based on
-        !!          the boundary conditions.
-    impure subroutine s_populate_grid_variables_buffers
-
-        integer :: i !< Generic loop iterator
-
-        ! Population of Buffers in x-direction
-
-        ! Populating cell-width distribution buffer, at the beginning of the
-        ! coordinate direction, based on the selected boundary condition. In
-        ! order, these are the ghost-cell extrapolation, symmetry, periodic,
-        ! and processor boundary conditions.
-        if (bc_x%beg <= BC_GHOST_EXTRAP) then
-            do i = 1, buff_size
-                dx(-i) = dx(0)
-            end do
-        elseif (bc_x%beg == BC_REFLECTIVE) then
-            do i = 1, buff_size
-                dx(-i) = dx(i - 1)
-            end do
-        elseif (bc_x%beg == BC_PERIODIC) then
-            do i = 1, buff_size
-                dx(-i) = dx(m - (i - 1))
-            end do
-        else
-            call s_mpi_sendrecv_grid_variables_buffers(1, -1)
-        end if
-
-        ! Computing the cell-boundary locations buffer, at the beginning of
-        ! the coordinate direction, from the cell-width distribution buffer
-        do i = 1, buff_size
-            x_cb(-1 - i) = x_cb(-i) - dx(-i)
-        end do
-        ! Computing the cell-center locations buffer, at the beginning of
-        ! the coordinate direction, from the cell-width distribution buffer
-        do i = 1, buff_size
-            x_cc(-i) = x_cc(1 - i) - (dx(1 - i) + dx(-i))/2._wp
-        end do
-
-        ! Populating the cell-width distribution buffer, at the end of the
-        ! coordinate direction, based on desired boundary condition. These
-        ! include, in order, ghost-cell extrapolation, symmetry, periodic,
-        ! and processor boundary conditions.
-        if (bc_x%end <= BC_GHOST_EXTRAP) then
-            do i = 1, buff_size
-                dx(m + i) = dx(m)
-            end do
-        elseif (bc_x%end == BC_REFLECTIVE) then
-            do i = 1, buff_size
-                dx(m + i) = dx(m - (i - 1))
-            end do
-        elseif (bc_x%end == BC_PERIODIC) then
-            do i = 1, buff_size
-                dx(m + i) = dx(i - 1)
-            end do
-        else
-            call s_mpi_sendrecv_grid_variables_buffers(1, 1)
-        end if
-
-        ! Populating the cell-boundary locations buffer, at the end of the
-        ! coordinate direction, from buffer of the cell-width distribution
-        do i = 1, buff_size
-            x_cb(m + i) = x_cb(m + (i - 1)) + dx(m + i)
-        end do
-        ! Populating the cell-center locations buffer, at the end of the
-        ! coordinate direction, from buffer of the cell-width distribution
-        do i = 1, buff_size
-            x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2._wp
-        end do
-
-        ! END: Population of Buffers in x-direction
-
-        ! Population of Buffers in y-direction
-
-        ! Populating cell-width distribution buffer, at the beginning of the
-        ! coordinate direction, based on the selected boundary condition. In
-        ! order, these are the ghost-cell extrapolation, symmetry, periodic,
-        ! and processor boundary conditions.
-        if (n == 0) then
-            return
-        elseif (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_AXIS) then
-            do i = 1, buff_size
-                dy(-i) = dy(0)
-            end do
-        elseif (bc_y%beg == BC_REFLECTIVE .or. bc_y%beg == BC_AXIS) then
-            do i = 1, buff_size
-                dy(-i) = dy(i - 1)
-            end do
-        elseif (bc_y%beg == BC_PERIODIC) then
-            do i = 1, buff_size
-                dy(-i) = dy(n - (i - 1))
-            end do
-        else
-            call s_mpi_sendrecv_grid_variables_buffers(2, -1)
-        end if
-
-        ! Computing the cell-boundary locations buffer, at the beginning of
-        ! the coordinate direction, from the cell-width distribution buffer
-        do i = 1, buff_size
-            y_cb(-1 - i) = y_cb(-i) - dy(-i)
-        end do
-        ! Computing the cell-center locations buffer, at the beginning of
-        ! the coordinate direction, from the cell-width distribution buffer
-        do i = 1, buff_size
-            y_cc(-i) = y_cc(1 - i) - (dy(1 - i) + dy(-i))/2._wp
-        end do
-
-        ! Populating the cell-width distribution buffer, at the end of the
-        ! coordinate direction, based on desired boundary condition. These
-        ! include, in order, ghost-cell extrapolation, symmetry, periodic,
-        ! and processor boundary conditions.
-        if (bc_y%end <= BC_GHOST_EXTRAP) then
-            do i = 1, buff_size
-                dy(n + i) = dy(n)
-            end do
-        elseif (bc_y%end == BC_REFLECTIVE) then
-            do i = 1, buff_size
-                dy(n + i) = dy(n - (i - 1))
-            end do
-        elseif (bc_y%end == BC_PERIODIC) then
-            do i = 1, buff_size
-                dy(n + i) = dy(i - 1)
-            end do
-        else
-            call s_mpi_sendrecv_grid_variables_buffers(2, 1)
-        end if
-
-        ! Populating the cell-boundary locations buffer, at the end of the
-        ! coordinate direction, from buffer of the cell-width distribution
-        do i = 1, buff_size
-            y_cb(n + i) = y_cb(n + (i - 1)) + dy(n + i)
-        end do
-        ! Populating the cell-center locations buffer, at the end of the
-        ! coordinate direction, from buffer of the cell-width distribution
-        do i = 1, buff_size
-            y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2._wp
-        end do
-
-        ! END: Population of Buffers in y-direction
-
-        ! Population of Buffers in z-direction
-
-        ! Populating cell-width distribution buffer, at the beginning of the
-        ! coordinate direction, based on the selected boundary condition. In
-        ! order, these are the ghost-cell extrapolation, symmetry, periodic,
-        ! and processor boundary conditions.
-        if (p == 0) then
-            return
-        elseif (bc_z%beg <= BC_GHOST_EXTRAP) then
-            do i = 1, buff_size
-                dz(-i) = dz(0)
-            end do
-        elseif (bc_z%beg == BC_REFLECTIVE) then
-            do i = 1, buff_size
-                dz(-i) = dz(i - 1)
-            end do
-        elseif (bc_z%beg == BC_PERIODIC) then
-            do i = 1, buff_size
-                dz(-i) = dz(p - (i - 1))
-            end do
-        else
-            call s_mpi_sendrecv_grid_variables_buffers(3, -1)
-        end if
-
-        ! Computing the cell-boundary locations buffer, at the beginning of
-        ! the coordinate direction, from the cell-width distribution buffer
-        do i = 1, buff_size
-            z_cb(-1 - i) = z_cb(-i) - dz(-i)
-        end do
-        ! Computing the cell-center locations buffer, at the beginning of
-        ! the coordinate direction, from the cell-width distribution buffer
-        do i = 1, buff_size
-            z_cc(-i) = z_cc(1 - i) - (dz(1 - i) + dz(-i))/2._wp
-        end do
-
-        ! Populating the cell-width distribution buffer, at the end of the
-        ! coordinate direction, based on desired boundary condition. These
-        ! include, in order, ghost-cell extrapolation, symmetry, periodic,
-        ! and processor boundary conditions.
-        if (bc_z%end <= BC_GHOST_EXTRAP) then
-            do i = 1, buff_size
-                dz(p + i) = dz(p)
-            end do
-        elseif (bc_z%end == BC_REFLECTIVE) then
-            do i = 1, buff_size
-                dz(p + i) = dz(p - (i - 1))
-            end do
-        elseif (bc_z%end == BC_PERIODIC) then
-            do i = 1, buff_size
-                dz(p + i) = dz(i - 1)
-            end do
-        else
-            call s_mpi_sendrecv_grid_variables_buffers(3, 1)
-        end if
-
-        ! Populating the cell-boundary locations buffer, at the end of the
-        ! coordinate direction, from buffer of the cell-width distribution
-        do i = 1, buff_size
-            z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i)
-        end do
-        ! Populating the cell-center locations buffer, at the end of the
-        ! coordinate direction, from buffer of the cell-width distribution
-        do i = 1, buff_size
-            z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2._wp
-        end do
-
-        ! END: Population of Buffers in z-direction
-
-    end subroutine s_populate_grid_variables_buffers
-
-    !> The purpose of this procedure is to initialize the
+        !> The purpose of this procedure is to initialize the
         !!      values of the internal-energy equations of each phase
         !!      from the mass of each phase, the mixture momentum and
         !!      mixture-total-energy equations.
@@ -1475,6 +1262,7 @@ contains
 
 
         call s_initialize_mpi_common_module()
+        call s_initialize_mpi_proxy_module()
         call s_initialize_variables_conversion_module()
         if (grid_geometry == 3) call s_initialize_fftw_module()
         call s_initialize_riemann_solvers_module()
@@ -1681,6 +1469,7 @@ contains
         if (viscous) then
             call s_finalize_viscous_module()
         end if
+        call s_finalize_mpi_proxy_module()
 
         if (surface_tension)  call s_finalize_surface_tension_module()
         if (bodyForces) call s_finalize_body_forces_module()
diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index 9a7f64775d..b1c338b5c9 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -34,8 +34,6 @@ module m_time_steppers
 
     use m_boundary_common
 
-    use m_boundary_conditions
-
     use m_helper
 
     use m_sim_helpers
@@ -941,7 +939,7 @@ contains
 
         elseif (bubbles_lagrange) then
 
-            call s_populate_variables_buffers(q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf, bc_type)
+            call s_populate_variables_buffers(bc_type, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf)
             call s_compute_bubble_EL_dynamics(q_prim_vf, stage)
             call s_transfer_data_to_tmp()
             call s_smear_voidfraction()
diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp
index fc73ed0743..34cdc451fd 100644
--- a/src/simulation/m_viscous.fpp
+++ b/src/simulation/m_viscous.fpp
@@ -1392,7 +1392,7 @@ contains
             end if
         end if
 
-        if (bc_x%beg <= BC_GHOST_EXTRAPOLATION) then
+        if (bc_x%beg <= BC_GHOST_EXTRAP) then
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = idwbuff(3)%beg, idwbuff(3)%end
                 do k = idwbuff(2)%beg, idwbuff(2)%end
@@ -1401,7 +1401,7 @@ contains
                 end do
             end do
         end if
-        if (bc_x%end <= BC_GHOST_EXTRAPOLATION) then
+        if (bc_x%end <= BC_GHOST_EXTRAP) then
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = idwbuff(3)%beg, idwbuff(3)%end
                 do k = idwbuff(2)%beg, idwbuff(2)%end
@@ -1411,7 +1411,7 @@ contains
             end do
         end if
         if (n > 0) then
-            if (bc_y%beg <= BC_GHOST_EXTRAPOLATION .and. bc_y%beg /= BC_NULL) then
+            if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_NULL) then
                 !$acc parallel loop collapse(2) gang vector default(present)
                 do l = idwbuff(3)%beg, idwbuff(3)%end
                     do j = idwbuff(1)%beg, idwbuff(1)%end
@@ -1420,7 +1420,7 @@ contains
                     end do
                 end do
             end if
-            if (bc_y%end <= BC_GHOST_EXTRAPOLATION) then
+            if (bc_y%end <= BC_GHOST_EXTRAP) then
                 !$acc parallel loop collapse(2) gang vector default(present)
                 do l = idwbuff(3)%beg, idwbuff(3)%end
                     do j = idwbuff(1)%beg, idwbuff(1)%end
@@ -1430,7 +1430,7 @@ contains
                 end do
             end if
             if (p > 0) then
-                if (bc_z%beg <= BC_GHOST_EXTRAPOLATION) then
+                if (bc_z%beg <= BC_GHOST_EXTRAP) then
                     !$acc parallel loop collapse(2) gang vector default(present)
                     do k = idwbuff(2)%beg, idwbuff(2)%end
                         do j = idwbuff(1)%beg, idwbuff(1)%end
@@ -1440,7 +1440,7 @@ contains
                         end do
                     end do
                 end if
-                if (bc_z%end <= BC_GHOST_EXTRAPOLATION) then
+                if (bc_z%end <= BC_GHOST_EXTRAP) then
                     !$acc parallel loop collapse(2) gang vector default(present)
                     do k = idwbuff(2)%beg, idwbuff(2)%end
                         do j = idwbuff(1)%beg, idwbuff(1)%end
diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp
index 015ea87a3f..331479a5cb 100644
--- a/src/simulation/m_weno.fpp
+++ b/src/simulation/m_weno.fpp
@@ -272,12 +272,12 @@ contains
                     ! any contributions from outside of the physical domain during
                     ! the WENO reconstruction
                     if (null_weights) then
-                        if (bc_s%beg == BC_RIEMANN_EXTRAPOLATION) then
+                        if (bc_s%beg == BC_RIEMANN_EXTRAP) then
                             d_cbR_${XYZ}$ (1, 0) = 0._wp; d_cbR_${XYZ}$ (0, 0) = 1._wp
                             d_cbL_${XYZ}$ (1, 0) = 0._wp; d_cbL_${XYZ}$ (0, 0) = 1._wp
                         end if
 
-                        if (bc_s%end == BC_RIEMANN_EXTRAPOLATION) then
+                        if (bc_s%end == BC_RIEMANN_EXTRAP) then
                             d_cbR_${XYZ}$ (0, s) = 0._wp; d_cbR_${XYZ}$ (1, s) = 1._wp
                             d_cbL_${XYZ}$ (0, s) = 0._wp; d_cbL_${XYZ}$ (1, s) = 1._wp
                         end if
@@ -418,14 +418,14 @@ contains
                     ! any contributions from outside of the physical domain during
                     ! the WENO reconstruction
                     if (null_weights) then
-                        if (bc_s%beg == BC_RIEMANN_EXTRAPOLATION) then
+                        if (bc_s%beg == BC_RIEMANN_EXTRAP) then
                             d_cbR_${XYZ}$ (1:2, 0) = 0._wp; d_cbR_${XYZ}$ (0, 0) = 1._wp
                             d_cbL_${XYZ}$ (1:2, 0) = 0._wp; d_cbL_${XYZ}$ (0, 0) = 1._wp
                             d_cbR_${XYZ}$ (2, 1) = 0._wp; d_cbR_${XYZ}$ (:, 1) = d_cbR_${XYZ}$ (:, 1)/sum(d_cbR_${XYZ}$ (:, 1))
                             d_cbL_${XYZ}$ (2, 1) = 0._wp; d_cbL_${XYZ}$ (:, 1) = d_cbL_${XYZ}$ (:, 1)/sum(d_cbL_${XYZ}$ (:, 1))
                         end if
 
-                        if (bc_s%end == BC_RIEMANN_EXTRAPOLATION) then
+                        if (bc_s%end == BC_RIEMANN_EXTRAP) then
                             d_cbR_${XYZ}$ (0, s - 1) = 0._wp; d_cbR_${XYZ}$ (:, s - 1) = d_cbR_${XYZ}$ (:, s - 1)/sum(d_cbR_${XYZ}$ (:, s - 1))
                             d_cbL_${XYZ}$ (0, s - 1) = 0._wp; d_cbL_${XYZ}$ (:, s - 1) = d_cbL_${XYZ}$ (:, s - 1)/sum(d_cbL_${XYZ}$ (:, s - 1))
                             d_cbR_${XYZ}$ (0:1, s) = 0._wp; d_cbR_${XYZ}$ (2, s) = 1._wp
diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py
index 95714f8d5e..e50858b3b4 100644
--- a/toolchain/mfc/run/case_dicts.py
+++ b/toolchain/mfc/run/case_dicts.py
@@ -60,6 +60,7 @@ def analytic(self):
     'Bx0': ParamType.REAL,
     'relativity': ParamType.LOG,
     'cont_damage': ParamType.LOG,
+    'num_bc_patches': ParamType.INT,
 }
 
 PRE_PROCESS = COMMON.copy()
@@ -97,7 +98,6 @@ def analytic(self):
     'surface_tension': ParamType.LOG,
     'elliptic_smoothing': ParamType.LOG,
     'elliptic_smoothing_iters': ParamType.INT,
-    'num_bc_patches': ParamType.INT,
     'viscous': ParamType.LOG,
     'bubbles_lagrange': ParamType.LOG,
 })