diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index e836d5696c..1a89318cc5 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -97,7 +97,7 @@ steps:
           - "🦢 shallow_water"
           - "🐳 simulation"
           - "🍂 lagrangian_particles"
-          - "🧅 multi_region"
+          # - "🧅 multi_region"
           - "🦧 scripts"
           - "👺 enzyme"
           - "🍱 xesmf"
diff --git a/Project.toml b/Project.toml
index 90404515b1..b51ee3d2f2 100755
--- a/Project.toml
+++ b/Project.toml
@@ -10,6 +10,7 @@ CubedSphere = "7445602f-e544-4518-8976-18f8e8ae6cdb"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
+DoubleFloats = "497a8b3b-efae-58df-a0af-a86822472b78"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
 GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
diff --git a/ext/OceananigansNCDatasetsExt.jl b/ext/OceananigansNCDatasetsExt.jl
index 156c3b17cb..a0e83f222a 100644
--- a/ext/OceananigansNCDatasetsExt.jl
+++ b/ext/OceananigansNCDatasetsExt.jl
@@ -7,9 +7,11 @@ using Printf: @sprintf
 using OrderedCollections: OrderedDict
 using SeawaterPolynomials: BoussinesqEquationOfState
 
+using Oceananigans: fields
 using Oceananigans: initialize!, prettytime, pretty_filesize, AbstractModel
 using Oceananigans.Architectures: CPU, GPU, on_architecture
 using Oceananigans.AbstractOperations: KernelFunctionOperation, AbstractOperation
+using Oceananigans.DistributedComputations: synchronize_communication!
 using Oceananigans.BuoyancyFormulations: BuoyancyForce, BuoyancyTracer, SeawaterBuoyancy, LinearEquationOfState
 using Oceananigans.Fields
 using Oceananigans.Fields: Reduction, reduced_dimensions, reduced_location, location, indices
@@ -1388,6 +1390,12 @@ Write output to netcdf file `output_writer.filepath` at specified intervals. Inc
 every time an output is written to the file.
 """
 function write_output!(ow::NetCDFWriter, model::AbstractModel)
+
+    # Synchronize model state if needed
+    for field in fields(model)
+        synchronize_communication!(field)
+    end
+
     # Start a new file if the file_splitting(model) is true
     ow.file_splitting(model) && start_next_file(model, ow)
     update_file_splitting_schedule!(ow.file_splitting, ow.filepath)
diff --git a/ext/OceananigansReactantExt/TimeSteppers.jl b/ext/OceananigansReactantExt/TimeSteppers.jl
index 635d52fd98..98dfe42ed1 100644
--- a/ext/OceananigansReactantExt/TimeSteppers.jl
+++ b/ext/OceananigansReactantExt/TimeSteppers.jl
@@ -12,8 +12,6 @@ using Oceananigans.Utils: @apply_regionally, apply_regionally!
 using Oceananigans.TimeSteppers:
     update_state!,
     tick!,
-    compute_pressure_correction!,
-    correct_velocities_and_cache_previous_tendencies!,
     step_lagrangian_particles!,
     QuasiAdamsBashforth2TimeStepper,
     compute_flux_bc_tendencies!
@@ -87,8 +85,7 @@ function time_step!(model::ReactantModel{<:QuasiAdamsBashforth2TimeStepper{FT}},
     ab2_timestepper.χ = χ
 
     # Full step for tracers, fractional step for velocities.
-    compute_flux_bc_tendencies!(model)
-    ab2_step!(model, Δt)
+    ab2_step!(model, Δt, callbacks)
 
     tick!(model.clock, Δt)
 
@@ -105,10 +102,7 @@ function time_step!(model::ReactantModel{<:QuasiAdamsBashforth2TimeStepper{FT}},
         model.clock.last_stage_Δt = Δt
     end
 
-    compute_pressure_correction!(model, Δt)
-    correct_velocities_and_cache_previous_tendencies!(model, Δt)
-
-    update_state!(model, callbacks; compute_tendencies=true)
+    update_state!(model, callbacks)
     step_lagrangian_particles!(model, Δt)
 
     # Return χ to initial value
diff --git a/src/Advection/weno_reconstruction.jl b/src/Advection/weno_reconstruction.jl
index ed126cdd84..0ad1422597 100644
--- a/src/Advection/weno_reconstruction.jl
+++ b/src/Advection/weno_reconstruction.jl
@@ -76,6 +76,7 @@ WENO{5, Float64, Float32}(order=9)
 """
 function WENO(FT::DataType=Oceananigans.defaults.FloatType, FT2::DataType=Float32;
               order = 5,
+              buffer_scheme = nothing,
               bounds = nothing)
 
     mod(order, 2) == 0 && throw(ArgumentError("WENO reconstruction scheme is defined only for odd orders"))
@@ -85,7 +86,10 @@ function WENO(FT::DataType=Oceananigans.defaults.FloatType, FT2::DataType=Float3
         return UpwindBiased(FT; order=1)
     else
         advecting_velocity_scheme = Centered(FT; order=order-1)
-        buffer_scheme = WENO(FT, FT2; order=order-2, bounds)
+        
+        if isnothing(buffer_scheme)
+            buffer_scheme = WENO(FT, FT2; order=order-2, bounds)
+        end
 
         N = Int((order + 1) ÷ 2)
         return WENO{N, FT, FT2}(bounds, buffer_scheme, advecting_velocity_scheme)
diff --git a/src/DistributedComputations/communication_buffers.jl b/src/DistributedComputations/communication_buffers.jl
index 4334c112c2..89088736dd 100644
--- a/src/DistributedComputations/communication_buffers.jl
+++ b/src/DistributedComputations/communication_buffers.jl
@@ -1,7 +1,7 @@
 using Oceananigans.BoundaryConditions
 using Oceananigans.BoundaryConditions: FieldBoundaryConditions, BoundaryCondition
 using Oceananigans.BoundaryConditions: MultiRegionCommunication, DistributedCommunication
-using Oceananigans.Grids: halo_size, size
+using Oceananigans.Grids: halo_size, size, topology
 using Oceananigans.Utils: launch!
 
 using Adapt
@@ -25,6 +25,26 @@ struct CommunicationBuffers{W, E, S, N, SW, SE, NW, NE}
    northeast :: NE
 end
 
+Adapt.adapt_structure(to, buff::CommunicationBuffers) =
+    CommunicationBuffers(Adapt.adapt(to, buff.west), 
+                         Adapt.adapt(to, buff.east),    
+                         Adapt.adapt(to, buff.north), 
+                         Adapt.adapt(to, buff.south), 
+                         Adapt.adapt(to, buff.southwest), 
+                         Adapt.adapt(to, buff.southeast), 
+                         Adapt.adapt(to, buff.northwest), 
+                         Adapt.adapt(to, buff.northeast))
+
+on_architecture(arch, buff::CommunicationBuffers) =
+    CommunicationBuffers(on_architecture(arch, buff.west), 
+                         on_architecture(arch, buff.east),    
+                         on_architecture(arch, buff.north), 
+                         on_architecture(arch, buff.south), 
+                         on_architecture(arch, buff.southwest), 
+                         on_architecture(arch, buff.southeast), 
+                         on_architecture(arch, buff.northwest), 
+                         on_architecture(arch, buff.northeast))
+
 communication_buffers(grid::DistributedGrid, data, boundary_conditions) = CommunicationBuffers(grid, data, boundary_conditions)
 
 function CommunicationBuffers(grid, data, boundary_conditions::FieldBoundaryConditions)
@@ -50,67 +70,77 @@ CommunicationBuffers(grid, data, ::Nothing) = nothing
 
 # OneDBuffers are associated with partitioning without corner passing,
 # therefore the "corner zones" are communicated within the one-dimensional pass.
-const OneDBuffers = CommunicationBuffers{<:Any, <:Any, <:Any, <:Any, <:Nothing, <:Nothing, <:Nothing, <:Nothing}
-
-x_communication_buffer(arch, grid, data, H, bc) = nothing
-y_communication_buffer(arch, grid, data, H, bc) = nothing
+struct OneDBuffer{B}
+    send :: B
+    recv :: B
+end
 
-# Only used for `Distributed` architectures
-corner_communication_buffer(arch, grid, data, Hx, Hy, edge1, edge2) = nothing
+# We never need to access buffers on the GPU!
+Adapt.adapt_structure(to, buff::OneDBuffer) = nothing
 
-# Disambiguation
-corner_communication_buffer(::Distributed, grid, data, Hx, Hy, ::Nothing, edge2) = nothing
-corner_communication_buffer(::Distributed, grid, data, Hx, Hy, edge1, ::Nothing) = nothing
-corner_communication_buffer(::Distributed, grid, data, Hx, Hy, ::Nothing, ::Nothing) = nothing
+####
+#### X and Y communication buffers
+####
 
-function corner_communication_buffer(arch::Distributed, grid, data, Hx, Hy, edge1, edge2)
-    return (send = on_architecture(arch, zeros(eltype(data), Hx, Hy, size(parent(data), 3))),
-            recv = on_architecture(arch, zeros(eltype(data), Hx, Hy, size(parent(data), 3))))
-end
+# Fallback
+x_communication_buffer(arch, grid, data, H, bc) = nothing
+y_communication_buffer(arch, grid, data, H, bc) = nothing
 
-function x_communication_buffer(arch::Distributed, grid, data, H, ::DCBC)
-    # Either we pass corners or it is a 1D parallelization in x
-    size_y = arch.ranks[2] == 1 ? size(parent(data), 2) : size(grid, 2)
-    return (send = on_architecture(arch, zeros(eltype(data), H, size_y, size(parent(data), 3))),
-            recv = on_architecture(arch, zeros(eltype(data), H, size_y, size(parent(data), 3))))
+# Either we pass corners or it is a 1D parallelization in x, note that, even for 2D parallelizations, we need a 
+# buffer that includes the corners (i.e. a OneDBuffer) if we are at the edge of the domain in y since we are not
+# passing all the corners in that case
+function x_communication_buffer(arch::Distributed, grid::AbstractGrid{FT, TX, TY}, data, H, ::DCBC) where {FT, TX, TY}
+    if (arch.ranks[2] == 1) || (TY == RightConnected) || (TY == LeftConnected)
+        return OneDBuffer(on_architecture(arch, zeros(eltype(data), H, size(parent(data), 2), size(parent(data), 3))),
+                          on_architecture(arch, zeros(eltype(data), H, size(parent(data), 2), size(parent(data), 3))))
+    else
+        return (send = on_architecture(arch, zeros(eltype(data), H, size(grid, 2), size(parent(data), 3))),
+                recv = on_architecture(arch, zeros(eltype(data), H, size(grid, 2), size(parent(data), 3))))
+    end
 end
 
-function y_communication_buffer(arch::Distributed, grid, data, H, ::DCBC)
+# Either we pass corners or it is a 1D parallelization in y. Note that, even for 2D parallelizations, we need a 
+# buffer that includes the corners (i.e. a OneDBuffer) if we are at the edge of the domain in x since we are not
+# passing all the corners in that case
+function y_communication_buffer(arch::Distributed, grid::AbstractGrid{FT, TX, TY}, data, H, ::DCBC) where {FT, TX, TY}
     # Either we pass corners or it is a 1D parallelization in y
-    size_x = arch.ranks[1] == 1 ? size(parent(data), 1) : size(grid, 1)
-    return (send = on_architecture(arch, zeros(eltype(data), size_x, H, size(parent(data), 3))),
-            recv = on_architecture(arch, zeros(eltype(data), size_x, H, size(parent(data), 3))))
+    if (arch.ranks[1] == 1) || (TX == RightConnected) || (TX == LeftConnected)
+        return OneDBuffer(on_architecture(arch, zeros(FT, size(parent(data), 1), H, size(parent(data), 3))),
+                          on_architecture(arch, zeros(FT, size(parent(data), 1), H, size(parent(data), 3))))
+    else
+        return (send = on_architecture(arch, zeros(FT, size(grid, 1), H, size(parent(data), 3))),
+                recv = on_architecture(arch, zeros(FT, size(grid, 1), H, size(parent(data), 3))))
+    end
 end
 
-x_communication_buffer(arch, grid, data, H, ::MCBC) =
-           (send = on_architecture(arch, zeros(eltype(data), H, size(parent(data), 2), size(parent(data), 3))),
-            recv = on_architecture(arch, zeros(eltype(data), H, size(parent(data), 2), size(parent(data), 3))))
+# Never pass corners in a MCBC.
+x_communication_buffer(arch, grid, data, H, ::MCBC) = 
+           OneDBuffer(on_architecture(arch, zeros(eltype(data), H, size(parent(data), 2), size(parent(data), 3))), 
+                      on_architecture(arch, zeros(eltype(data), H, size(parent(data), 2), size(parent(data), 3))))    
 
-y_communication_buffer(arch, grid, data, H, ::MCBC) =
-           (send = on_architecture(arch, zeros(eltype(data), size(parent(data), 1), H, size(parent(data), 3))),
-            recv = on_architecture(arch, zeros(eltype(data), size(parent(data), 1), H, size(parent(data), 3))))
+y_communication_buffer(arch, grid, data, H, ::MCBC) = 
+           OneDBuffer(on_architecture(arch, zeros(eltype(data), size(parent(data), 1), H, size(parent(data), 3))), 
+                      on_architecture(arch, zeros(eltype(data), size(parent(data), 1), H, size(parent(data), 3))))
 
-Adapt.adapt_structure(to, buff::CommunicationBuffers) =
-    CommunicationBuffers(Adapt.adapt(to, buff.west),
-                         Adapt.adapt(to, buff.east),
-                         Adapt.adapt(to, buff.north),
-                         Adapt.adapt(to, buff.south),
-                         Adapt.adapt(to, buff.southwest),
-                         Adapt.adapt(to, buff.southeast),
-                         Adapt.adapt(to, buff.northwest),
-                         Adapt.adapt(to, buff.northeast))
+#####
+##### Corner communication buffers
+#####
 
-on_architecture(arch, buff::CommunicationBuffers) =
-    CommunicationBuffers(on_architecture(arch, buff.west),
-                         on_architecture(arch, buff.east),
-                         on_architecture(arch, buff.north),
-                         on_architecture(arch, buff.south),
-                         on_architecture(arch, buff.southwest),
-                         on_architecture(arch, buff.southeast),
-                         on_architecture(arch, buff.northwest),
-                         on_architecture(arch, buff.northeast))
+# Only used for `Distributed` architectures
+corner_communication_buffer(arch, grid, data, Hx, Hy, xedge, yedge) = nothing
 
-fill_send_buffers!(c::OffsetArray, ::Nothing, grid) = nothing
+# Disambiguation
+corner_communication_buffer(::Distributed, grid, data, Hx, Hy, ::Nothing, ::Nothing) = nothing
+corner_communication_buffer(arch::Distributed, grid, data, Hx, Hy, ::Nothing, yedge) = nothing
+corner_communication_buffer(arch::Distributed, grid, data, Hx, Hy, xedge, ::Nothing) = nothing
+    
+# Do not need disctinction between one or two dimensional partitioning since they are 
+# uniquely defined and only used in the two-dimensional partitioning case, in all other cases
+# they are equal to `nothing`
+function corner_communication_buffer(arch::Distributed, grid, data, Hx, Hy, xedge, yedge)
+    return (send = on_architecture(arch, zeros(eltype(data), Hx, Hy, size(parent(data), 3))), 
+            recv = on_architecture(arch, zeros(eltype(data), Hx, Hy, size(parent(data), 3))))    
+end
 
 """
     fill_send_buffers!(c::OffsetArray, buffers::CommunicationBuffers, grid)
@@ -121,29 +151,30 @@ function fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid)
     Hx, Hy, _ = halo_size(grid)
     Nx, Ny, _ = size(grid)
 
-     _fill_west_send_buffer!(parent(c), buff, buff.west,  Hx, Hy, Nx, Ny)
-     _fill_east_send_buffer!(parent(c), buff, buff.east,  Hx, Hy, Nx, Ny)
-    _fill_south_send_buffer!(parent(c), buff, buff.south, Hx, Hy, Nx, Ny)
-    _fill_north_send_buffer!(parent(c), buff, buff.north, Hx, Hy, Nx, Ny)
+     _fill_west_send_buffer!(parent(c), buff.west,  Hx, Hy, Nx, Ny)
+     _fill_east_send_buffer!(parent(c), buff.east,  Hx, Hy, Nx, Ny)
+    _fill_south_send_buffer!(parent(c), buff.south, Hx, Hy, Nx, Ny)
+    _fill_north_send_buffer!(parent(c), buff.north, Hx, Hy, Nx, Ny)
 
-    _fill_southwest_send_buffer!(parent(c), buff, buff.southwest, Hx, Hy, Nx, Ny)
-    _fill_southeast_send_buffer!(parent(c), buff, buff.southeast, Hx, Hy, Nx, Ny)
-    _fill_northwest_send_buffer!(parent(c), buff, buff.northwest, Hx, Hy, Nx, Ny)
-    _fill_northeast_send_buffer!(parent(c), buff, buff.northeast, Hx, Hy, Nx, Ny)
+    _fill_southwest_send_buffer!(parent(c), buff.southwest, Hx, Hy, Nx, Ny)
+    _fill_southeast_send_buffer!(parent(c), buff.southeast, Hx, Hy, Nx, Ny)
+    _fill_northwest_send_buffer!(parent(c), buff.northwest, Hx, Hy, Nx, Ny)
+    _fill_northeast_send_buffer!(parent(c), buff.northeast, Hx, Hy, Nx, Ny)
 
     return nothing
 end
 
+fill_send_buffers!(c::OffsetArray, ::Nothing, grid) = nothing
 fill_send_buffers!(c::OffsetArray, ::Nothing, grid, ::Val{:corners}) = nothing
 
 function fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::Val{:corners})
     Hx, Hy, _ = halo_size(grid)
     Nx, Ny, _ = size(grid)
 
-    _fill_southwest_send_buffer!(parent(c), buff, buff.southwest, Hx, Hy, Nx, Ny)
-    _fill_southeast_send_buffer!(parent(c), buff, buff.southeast, Hx, Hy, Nx, Ny)
-    _fill_northwest_send_buffer!(parent(c), buff, buff.northwest, Hx, Hy, Nx, Ny)
-    _fill_northeast_send_buffer!(parent(c), buff, buff.northeast, Hx, Hy, Nx, Ny)
+    _fill_southwest_send_buffer!(parent(c), buff.southwest, Hx, Hy, Nx, Ny)
+    _fill_southeast_send_buffer!(parent(c), buff.southeast, Hx, Hy, Nx, Ny)
+    _fill_northwest_send_buffer!(parent(c), buff.northwest, Hx, Hy, Nx, Ny)
+    _fill_northeast_send_buffer!(parent(c), buff.northeast, Hx, Hy, Nx, Ny)
 
     return nothing
 end
@@ -152,14 +183,14 @@ end
 ##### Single sided fill_send_buffers!
 #####
 
-fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::West) =
-    _fill_west_send_buffer!(parent(c), buff, buff.west, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...)
-fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::East) =
-    _fill_east_send_buffer!(parent(c), buff, buff.east, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...)
-fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::South) =
-    _fill_south_send_buffer!(parent(c), buff, buff.south, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...)
-fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::North) =
-    _fill_north_send_buffer!(parent(c), buff, buff.north, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...)
+fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::West) = 
+    _fill_west_send_buffer!(parent(c), buff.west, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...)
+fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::East) = 
+    _fill_east_send_buffer!(parent(c), buff.east, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...)
+fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::South) = 
+    _fill_south_send_buffer!(parent(c), buff.south, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...)
+fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::North) = 
+    _fill_north_send_buffer!(parent(c), buff.north, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...)
 fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::Bottom) = nothing
 fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::Top) = nothing
 
@@ -171,16 +202,16 @@ function fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::
     Hx, Hy, _ = halo_size(grid)
     Nx, Ny, _ = size(grid)
 
-     _fill_west_send_buffer!(parent(c), buff, buff.west,  Hx, Hy, Nx, Ny)
-     _fill_east_send_buffer!(parent(c), buff, buff.east,  Hx, Hy, Nx, Ny)
+     _fill_west_send_buffer!(parent(c), buff.west,  Hx, Hy, Nx, Ny)
+     _fill_east_send_buffer!(parent(c), buff.east,  Hx, Hy, Nx, Ny)
 end
 
 function fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::SouthAndNorth)
     Hx, Hy, _ = halo_size(grid)
     Nx, Ny, _ = size(grid)
 
-    _fill_south_send_buffer!(parent(c), buff, buff.south, Hx, Hy, Nx, Ny)
-    _fill_north_send_buffer!(parent(c), buff, buff.north, Hx, Hy, Nx, Ny)
+    _fill_south_send_buffer!(parent(c), buff.south, Hx, Hy, Nx, Ny)
+    _fill_north_send_buffer!(parent(c), buff.north, Hx, Hy, Nx, Ny)
 end
 
 fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::BottomAndTop) = nothing
@@ -194,15 +225,15 @@ function recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid)
     Hx, Hy, _ = halo_size(grid)
     Nx, Ny, _ = size(grid)
 
-     _recv_from_west_buffer!(parent(c), buff, buff.west,  Hx, Hy, Nx, Ny)
-     _recv_from_east_buffer!(parent(c), buff, buff.east,  Hx, Hy, Nx, Ny)
-    _recv_from_south_buffer!(parent(c), buff, buff.south, Hx, Hy, Nx, Ny)
-    _recv_from_north_buffer!(parent(c), buff, buff.north, Hx, Hy, Nx, Ny)
+     _recv_from_west_buffer!(parent(c), buff.west,  Hx, Hy, Nx, Ny)
+     _recv_from_east_buffer!(parent(c), buff.east,  Hx, Hy, Nx, Ny)
+    _recv_from_south_buffer!(parent(c), buff.south, Hx, Hy, Nx, Ny)
+    _recv_from_north_buffer!(parent(c), buff.north, Hx, Hy, Nx, Ny)
 
-   _recv_from_southwest_buffer!(parent(c), buff, buff.southwest, Hx, Hy, Nx, Ny)
-   _recv_from_southeast_buffer!(parent(c), buff, buff.southeast, Hx, Hy, Nx, Ny)
-   _recv_from_northwest_buffer!(parent(c), buff, buff.northwest, Hx, Hy, Nx, Ny)
-   _recv_from_northeast_buffer!(parent(c), buff, buff.northeast, Hx, Hy, Nx, Ny)
+   _recv_from_southwest_buffer!(parent(c), buff.southwest, Hx, Hy, Nx, Ny)
+   _recv_from_southeast_buffer!(parent(c), buff.southeast, Hx, Hy, Nx, Ny)
+   _recv_from_northwest_buffer!(parent(c), buff.northwest, Hx, Hy, Nx, Ny)
+   _recv_from_northeast_buffer!(parent(c), buff.northeast, Hx, Hy, Nx, Ny)
 
    return nothing
 end
@@ -211,10 +242,10 @@ function recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::
     Hx, Hy, _ = halo_size(grid)
     Nx, Ny, _ = size(grid)
 
-   _recv_from_southwest_buffer!(parent(c), buff, buff.southwest, Hx, Hy, Nx, Ny)
-   _recv_from_southeast_buffer!(parent(c), buff, buff.southeast, Hx, Hy, Nx, Ny)
-   _recv_from_northwest_buffer!(parent(c), buff, buff.northwest, Hx, Hy, Nx, Ny)
-   _recv_from_northeast_buffer!(parent(c), buff, buff.northeast, Hx, Hy, Nx, Ny)
+   _recv_from_southwest_buffer!(parent(c), buff.southwest, Hx, Hy, Nx, Ny)
+   _recv_from_southeast_buffer!(parent(c), buff.southeast, Hx, Hy, Nx, Ny)
+   _recv_from_northwest_buffer!(parent(c), buff.northwest, Hx, Hy, Nx, Ny)
+   _recv_from_northeast_buffer!(parent(c), buff.northeast, Hx, Hy, Nx, Ny)
 
    return nothing
 end
@@ -223,14 +254,14 @@ end
 ##### Single sided recv_from_buffers!
 #####
 
-recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::West) =
-    _recv_from_west_buffer!(parent(c), buff, buff.west, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...)
-recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::East) =
-    _recv_from_east_buffer!(parent(c), buff, buff.east, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...)
-recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::South) =
-    _recv_from_south_buffer!(parent(c), buff, buff.south, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...)
-recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::North) =
-    _recv_from_north_buffer!(parent(c), buff, buff.north, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...)
+recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::West) = 
+    _recv_from_west_buffer!(parent(c), buff.west, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...)
+recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::East) = 
+    _recv_from_east_buffer!(parent(c), buff.east, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...)
+recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::South) = 
+    _recv_from_south_buffer!(parent(c), buff.south, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...)
+recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::North) = 
+    _recv_from_north_buffer!(parent(c), buff.north, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...)
 recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::Bottom) = nothing
 recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::Top) = nothing
 
@@ -242,8 +273,8 @@ function recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::
     Hx, Hy, _ = halo_size(grid)
     Nx, Ny, _ = size(grid)
 
-    _recv_from_west_buffer!(parent(c), buff, buff.west, Hx, Hy, Nx, Ny)
-    _recv_from_east_buffer!(parent(c), buff, buff.east, Hx, Hy, Nx, Ny)
+    _recv_from_west_buffer!(parent(c), buff.west, Hx, Hy, Nx, Ny)
+    _recv_from_east_buffer!(parent(c), buff.east, Hx, Hy, Nx, Ny)
 
     return nothing
 end
@@ -252,8 +283,8 @@ function recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::
     Hx, Hy, _ = halo_size(grid)
     Nx, Ny, _ = size(grid)
 
-   _recv_from_south_buffer!(parent(c), buff, buff.south, Hx, Hy, Nx, Ny)
-   _recv_from_north_buffer!(parent(c), buff, buff.north, Hx, Hy, Nx, Ny)
+   _recv_from_south_buffer!(parent(c), buff.south, Hx, Hy, Nx, Ny)
+   _recv_from_north_buffer!(parent(c), buff.north, Hx, Hy, Nx, Ny)
 
    return nothing
 end
@@ -267,48 +298,45 @@ recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::BottomAnd
 for dir in (:west, :east, :south, :north, :southwest, :southeast, :northwest, :northeast)
     _fill_send_buffer! = Symbol(:_fill_, dir, :_send_buffer!)
     _recv_from_buffer! = Symbol(:_recv_from_, dir, :_buffer!)
-
-    @eval $_fill_send_buffer!(c, b, ::Nothing, args...) = nothing
-    @eval $_recv_from_buffer!(c, b, ::Nothing, args...) = nothing
-    @eval $_fill_send_buffer!(c, ::OneDBuffers, ::Nothing, args...) = nothing
-    @eval $_recv_from_buffer!(c, ::OneDBuffers, ::Nothing, args...) = nothing
+    @eval $_fill_send_buffer!(c, ::Nothing, args...) = nothing
+    @eval $_recv_from_buffer!(c, ::Nothing, args...) = nothing
 end
 
 #####
 ##### 1D Parallelizations (cover corners with 1 MPI pass)
 #####
 
- _fill_west_send_buffer!(c, ::OneDBuffers, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:2Hx,   :, :)
- _fill_east_send_buffer!(c, ::OneDBuffers, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Nx:Nx+Hx, :, :)
-_fill_south_send_buffer!(c, ::OneDBuffers, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, :, 1+Hy:2Hy,  :)
-_fill_north_send_buffer!(c, ::OneDBuffers, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, :, 1+Ny:Ny+Hy, :)
+ _fill_west_send_buffer!(c, buff::OneDBuffer, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:2Hx,   :, :)
+ _fill_east_send_buffer!(c, buff::OneDBuffer, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Nx:Nx+Hx, :, :)
+_fill_south_send_buffer!(c, buff::OneDBuffer, Hx, Hy, Nx, Ny) = buff.send .= view(c, :, 1+Hy:2Hy,  :)
+_fill_north_send_buffer!(c, buff::OneDBuffer, Hx, Hy, Nx, Ny) = buff.send .= view(c, :, 1+Ny:Ny+Hy, :)
 
- _recv_from_west_buffer!(c, ::OneDBuffers, buff, Hx, Hy, Nx, Ny) = view(c, 1:Hx,           :,     :) .= buff.recv
- _recv_from_east_buffer!(c, ::OneDBuffers, buff, Hx, Hy, Nx, Ny) = view(c, 1+Nx+Hx:Nx+2Hx, :,     :) .= buff.recv
-_recv_from_south_buffer!(c, ::OneDBuffers, buff, Hx, Hy, Nx, Ny) = view(c, :,     1:Hy,           :) .= buff.recv
-_recv_from_north_buffer!(c, ::OneDBuffers, buff, Hx, Hy, Nx, Ny) = view(c, :,     1+Ny+Hy:Ny+2Hy, :) .= buff.recv
+ _recv_from_west_buffer!(c, buff::OneDBuffer, Hx, Hy, Nx, Ny) = view(c, 1:Hx,           :,     :) .= buff.recv
+ _recv_from_east_buffer!(c, buff::OneDBuffer, Hx, Hy, Nx, Ny) = view(c, 1+Nx+Hx:Nx+2Hx, :,     :) .= buff.recv
+_recv_from_south_buffer!(c, buff::OneDBuffer, Hx, Hy, Nx, Ny) = view(c, :,     1:Hy,           :) .= buff.recv
+_recv_from_north_buffer!(c, buff::OneDBuffer, Hx, Hy, Nx, Ny) = view(c, :,     1+Ny+Hy:Ny+2Hy, :) .= buff.recv
 
 #####
-##### 2D Parallelizations (explicitly send corners)
+##### 2D Parallelizations (explicitly send all corners)
 #####
 
- _fill_west_send_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:2Hx,   1+Hy:Ny+Hy, :)
- _fill_east_send_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Nx:Nx+Hx, 1+Hy:Ny+Hy, :)
-_fill_south_send_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:Nx+Hx, 1+Hy:2Hy,  :)
-_fill_north_send_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:Nx+Hx, 1+Ny:Ny+Hy, :)
-
- _recv_from_west_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = view(c, 1:Hx,           1+Hy:Ny+Hy,     :) .= buff.recv
- _recv_from_east_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = view(c, 1+Nx+Hx:Nx+2Hx, 1+Hy:Ny+Hy,     :) .= buff.recv
-_recv_from_south_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = view(c, 1+Hx:Nx+Hx,     1:Hy,           :) .= buff.recv
-_recv_from_north_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = view(c, 1+Hx:Nx+Hx,     1+Ny+Hy:Ny+2Hy, :) .= buff.recv
-
-_fill_southwest_send_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:2Hx,   1+Hy:2Hy,   :)
-_fill_southeast_send_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Nx:Nx+Hx, 1+Hy:2Hy,   :)
-_fill_northwest_send_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:2Hx,   1+Ny:Ny+Hy, :)
-_fill_northeast_send_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Nx:Nx+Hx, 1+Ny:Ny+Hy, :)
-
-_recv_from_southwest_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = view(c, 1:Hx,           1:Hy,           :) .= buff.recv
-_recv_from_southeast_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = view(c, 1+Nx+Hx:Nx+2Hx, 1:Hy,           :) .= buff.recv
-_recv_from_northwest_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = view(c, 1:Hx,           1+Ny+Hy:Ny+2Hy, :) .= buff.recv
-_recv_from_northeast_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = view(c, 1+Nx+Hx:Nx+2Hx, 1+Ny+Hy:Ny+2Hy, :) .= buff.recv
+ _fill_west_send_buffer!(c, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:2Hx,   1+Hy:Ny+Hy, :)
+ _fill_east_send_buffer!(c, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Nx:Nx+Hx, 1+Hy:Ny+Hy, :)
+_fill_south_send_buffer!(c, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:Nx+Hx, 1+Hy:2Hy,  :)
+_fill_north_send_buffer!(c, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:Nx+Hx, 1+Ny:Ny+Hy, :)
+
+ _recv_from_west_buffer!(c, buff, Hx, Hy, Nx, Ny) = view(c, 1:Hx,           1+Hy:Ny+Hy,     :) .= buff.recv
+ _recv_from_east_buffer!(c, buff, Hx, Hy, Nx, Ny) = view(c, 1+Nx+Hx:Nx+2Hx, 1+Hy:Ny+Hy,     :) .= buff.recv
+_recv_from_south_buffer!(c, buff, Hx, Hy, Nx, Ny) = view(c, 1+Hx:Nx+Hx,     1:Hy,           :) .= buff.recv
+_recv_from_north_buffer!(c, buff, Hx, Hy, Nx, Ny) = view(c, 1+Hx:Nx+Hx,     1+Ny+Hy:Ny+2Hy, :) .= buff.recv
+
+_fill_southwest_send_buffer!(c, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:2Hx,   1+Hy:2Hy,   :)
+_fill_southeast_send_buffer!(c, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Nx:Nx+Hx, 1+Hy:2Hy,   :)
+_fill_northwest_send_buffer!(c, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:2Hx,   1+Ny:Ny+Hy, :)
+_fill_northeast_send_buffer!(c, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Nx:Nx+Hx, 1+Ny:Ny+Hy, :)
+
+_recv_from_southwest_buffer!(c, buff, Hx, Hy, Nx, Ny) = view(c, 1:Hx,           1:Hy,           :) .= buff.recv
+_recv_from_southeast_buffer!(c, buff, Hx, Hy, Nx, Ny) = view(c, 1+Nx+Hx:Nx+2Hx, 1:Hy,           :) .= buff.recv
+_recv_from_northwest_buffer!(c, buff, Hx, Hy, Nx, Ny) = view(c, 1:Hx,           1+Ny+Hy:Ny+2Hy, :) .= buff.recv
+_recv_from_northeast_buffer!(c, buff, Hx, Hy, Nx, Ny) = view(c, 1+Nx+Hx:Nx+2Hx, 1+Ny+Hy:Ny+2Hy, :) .= buff.recv
 
diff --git a/src/DistributedComputations/distributed_fields.jl b/src/DistributedComputations/distributed_fields.jl
index 2545920f7b..f9357b40c2 100644
--- a/src/DistributedComputations/distributed_fields.jl
+++ b/src/DistributedComputations/distributed_fields.jl
@@ -62,13 +62,23 @@ function set!(u::DistributedField, V::Field)
     end
 end
 
+# Fallback -> not implemented
+synchronize_communication!(var) = throw(ArgumentError("`synchronize_communication!` not implemented for variables of type $(typeof(var))"))
+
+# Methods for types that do not require synchronization
+synchronize_communication!(::AbstractField) = nothing
+synchronize_communication!(::AbstractArray) = nothing   
+synchronize_communication!(::Number)        = nothing
+
+# Distribute synchronize_communication! over tuples and named tuples
+synchronize_communication!(t::Union{NamedTuple, Tuple}) = foreach(synchronize_communication!, t)
 
 """
     synchronize_communication!(field)
 
 complete the halo passing of `field` among processors.
 """
-function synchronize_communication!(field)
+function synchronize_communication!(field::DistributedField)
     arch = architecture(field.grid)
 
     # Wait for outstanding requests
diff --git a/src/DistributedComputations/distributed_grids.jl b/src/DistributedComputations/distributed_grids.jl
index 952dc9a0cb..9b3a82ca91 100644
--- a/src/DistributedComputations/distributed_grids.jl
+++ b/src/DistributedComputations/distributed_grids.jl
@@ -320,12 +320,14 @@ end
 function scatter_local_grids(global_grid::RectilinearGrid, arch::Distributed, local_size)
     x, y, z, topo, halo = scatter_grid_properties(global_grid)
     global_sz = global_size(arch, local_size)
+    global_sz = pop_flat_elements(global_sz, topo)
     return RectilinearGrid(arch, eltype(global_grid); size=global_sz, x=x, y=y, z=z, halo=halo, topology=topo)
 end
 
 function scatter_local_grids(global_grid::LatitudeLongitudeGrid, arch::Distributed, local_size)
     x, y, z, topo, halo = scatter_grid_properties(global_grid)
     global_sz = global_size(arch, local_size)
+    global_sz = pop_flat_elements(global_sz, topo)
     return LatitudeLongitudeGrid(arch, eltype(global_grid); size=global_sz, longitude=x,
                                  latitude=y, z=z, halo=halo, topology=topo, radius=global_grid.radius)
 end
diff --git a/src/DistributedComputations/halo_communication.jl b/src/DistributedComputations/halo_communication.jl
index 26bc7f641c..559882da67 100644
--- a/src/DistributedComputations/halo_communication.jl
+++ b/src/DistributedComputations/halo_communication.jl
@@ -84,8 +84,7 @@ fill_halo_regions!(field::DistributedField, args...; kwargs...) =
                        args...;
                        kwargs...)
 
-function fill_halo_regions!(c::OffsetArray, boundary_conditions, indices, loc, grid::DistributedGrid, buffers, args...;
-                            fill_open_bcs=true, kwargs...)
+function fill_halo_regions!(c::OffsetArray, boundary_conditions, indices, loc, grid::DistributedGrid, args...; kwargs...)
 
     arch = architecture(grid)
     kernels!, bcs = get_boundary_kernels(boundary_conditions, c, grid, loc, indices)
@@ -94,10 +93,10 @@ function fill_halo_regions!(c::OffsetArray, boundary_conditions, indices, loc, g
     outstanding_requests = length(arch.mpi_requests)
 
     for task = 1:number_of_tasks
-        fill_halo_event!(c, kernels![task], bcs[task], loc, grid, buffers, args...; kwargs...)
+        fill_halo_event!(c, kernels![task], bcs[task], loc, grid, args...; kwargs...)
     end
 
-    fill_corners!(c, arch.connectivity, indices, loc, arch, grid, buffers, args...; kwargs...)
+    fill_corners!(c, arch.connectivity, indices, loc, arch, grid, args...; kwargs...)
 
     # We increment the request counter only if we have actually initiated the MPI communication.
     # This is the case only if at least one of the boundary conditions is a distributed communication
diff --git a/src/ImmersedBoundaries/grid_fitted_bottom.jl b/src/ImmersedBoundaries/grid_fitted_bottom.jl
index 3367c3a9b0..3588146c0e 100644
--- a/src/ImmersedBoundaries/grid_fitted_bottom.jl
+++ b/src/ImmersedBoundaries/grid_fitted_bottom.jl
@@ -158,7 +158,6 @@ YFlatAGFIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Flat, <:Any, <:Any, <:Abstrac
 @inline static_column_depthᶠᶠᵃ(i, j, ibg::XFlatAGFIBG) = static_column_depthᶜᶠᵃ(i, j, ibg)
 @inline static_column_depthᶠᶠᵃ(i, j, ibg::YFlatAGFIBG) = static_column_depthᶠᶜᵃ(i, j, ibg)
 
-
 function constructor_arguments(grid::AGFBIBG)
     args, kwargs = constructor_arguments(grid.underlying_grid)
     args = merge(args, Dict(:bottom_height => grid.immersed_boundary.bottom_height,
diff --git a/src/ImmersedBoundaries/mutable_immersed_grid.jl b/src/ImmersedBoundaries/mutable_immersed_grid.jl
index ad8c56d91b..d0e00c006c 100644
--- a/src/ImmersedBoundaries/mutable_immersed_grid.jl
+++ b/src/ImmersedBoundaries/mutable_immersed_grid.jl
@@ -28,6 +28,16 @@ const MutableGridOfSomeKind = Union{MutableImmersedGrid, AbstractMutableGrid}
 @inline column_depthᶠᶜᵃ(i, j, grid::MutableGridOfSomeKind) = column_depthᶠᶜᵃ(i, j, 1, grid, grid.z.ηⁿ)
 @inline column_depthᶠᶠᵃ(i, j, grid::MutableGridOfSomeKind) = column_depthᶠᶠᵃ(i, j, 1, grid, grid.z.ηⁿ)
 
+@inline column_depthᶜᶜᵃ(i, j, k, grid) = static_column_depthᶜᶜᵃ(i, j, grid)
+@inline column_depthᶜᶠᵃ(i, j, k, grid) = static_column_depthᶜᶠᵃ(i, j, grid)
+@inline column_depthᶠᶜᵃ(i, j, k, grid) = static_column_depthᶠᶜᵃ(i, j, grid)
+@inline column_depthᶠᶠᵃ(i, j, k, grid) = static_column_depthᶠᶠᵃ(i, j, grid)
+
+@inline column_depthᶜᶜᵃ(i, j, k, grid::MutableGridOfSomeKind) = column_depthᶜᶜᵃ(i, j, 1, grid, grid.z.ηⁿ)
+@inline column_depthᶜᶠᵃ(i, j, k, grid::MutableGridOfSomeKind) = column_depthᶜᶠᵃ(i, j, 1, grid, grid.z.ηⁿ)
+@inline column_depthᶠᶜᵃ(i, j, k, grid::MutableGridOfSomeKind) = column_depthᶠᶜᵃ(i, j, 1, grid, grid.z.ηⁿ)
+@inline column_depthᶠᶠᵃ(i, j, k, grid::MutableGridOfSomeKind) = column_depthᶠᶠᵃ(i, j, 1, grid, grid.z.ηⁿ)
+
 # Fallbacks
 @inline σⁿ(i, j, k, ibg::IBG, ℓx, ℓy, ℓz) = σⁿ(i, j, k, ibg.underlying_grid, ℓx, ℓy, ℓz)
 @inline σ⁻(i, j, k, ibg::IBG, ℓx, ℓy, ℓz) = σ⁻(i, j, k, ibg.underlying_grid, ℓx, ℓy, ℓz)
diff --git a/src/Models/HydrostaticFreeSurfaceModels/HydrostaticFreeSurfaceModels.jl b/src/Models/HydrostaticFreeSurfaceModels/HydrostaticFreeSurfaceModels.jl
index 7ec0bc0fdc..642afe5912 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/HydrostaticFreeSurfaceModels.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/HydrostaticFreeSurfaceModels.jl
@@ -10,8 +10,9 @@ using KernelAbstractions.Extras.LoopInfo: @unroll
 using Adapt
 
 using Oceananigans.Utils
+using Oceananigans.Grids
 using Oceananigans.Utils: launch!
-using Oceananigans.Grids: AbstractGrid
+using Oceananigans.Grids: AbstractGrid, StaticVerticalDiscretization
 
 using DocStringExtensions
 
@@ -19,25 +20,21 @@ import Oceananigans: fields, prognostic_fields, initialize!
 import Oceananigans.Advection: cell_advection_timescale
 import Oceananigans.TimeSteppers: step_lagrangian_particles!
 import Oceananigans.Architectures: on_architecture
+import Oceananigans.BoundaryConditions: fill_halo_regions!
 
-using Oceananigans.TimeSteppers: SplitRungeKutta3TimeStepper, QuasiAdamsBashforth2TimeStepper
+using Oceananigans.TimeSteppers: SplitRungeKuttaTimeStepper, QuasiAdamsBashforth2TimeStepper
+
+# The only grid type that can support an FFT implicit free-surface solver 
+const XYRegularStaticRG = RectilinearGrid{<:Any, <:Any, <:Any, <:Any, <:StaticVerticalDiscretization, <:Number, <:Number}
 
 abstract type AbstractFreeSurface{E, G} end
 
 struct ZCoordinate end
-
-struct ZStarCoordinate{CC}
-    storage :: CC # Storage space used in different ways by different timestepping schemes.
-end
-
-ZStarCoordinate(grid::AbstractGrid) = ZStarCoordinate(Field{Center, Center, Nothing}(grid))
+struct ZStarCoordinate end
 
 Base.summary(::ZStarCoordinate) = "ZStarCoordinate"
 Base.show(io::IO, c::ZStarCoordinate) = print(io, summary(c))
 
-Adapt.adapt_structure(to, coord::ZStarCoordinate) = ZStarCoordinate(Adapt.adapt(to, coord.storage))
-on_architecture(arch, coord::ZStarCoordinate) = ZStarCoordinate(on_architecture(arch, coord.storage))
-
 # This is only used by the cubed sphere for now.
 fill_horizontal_velocity_halos!(args...) = nothing
 
@@ -50,8 +47,31 @@ free_surface_displacement_field(velocities, ::Nothing, grid) = nothing
 
 # free surface initialization functions
 initialize_free_surface!(free_surface, grid, velocities) = nothing
+compute_transport_velocities!(model, free_surface) = nothing    
+
+# If U and V exist, we use them
+@inline barotropic_U(i, j, k, grid, U, u) = @inbounds U[i, j, k]
+@inline barotropic_V(i, j, k, grid, V, v) = @inbounds V[i, j, k]
+
+# If either U or V are not available, we compute them
+@inline function barotropic_U(i, j, k, grid, ::Nothing, u)
+    U = u[i, j, 1] * Δzᶠᶜᶜ(i, j, 1, grid)
+    for k′ in 2:size(grid, 3)
+        @inbounds U += u[i, j, k′] * Δzᶠᶜᶜ(i, j, k′, grid)
+    end
+    return U
+end
+
+@inline function barotropic_V(i, j, k, grid, ::Nothing, v)
+    V = v[i, j, 1] * Δzᶜᶠᶜ(i, j, 1, grid)
+    for k′ in 2:size(grid, 3)
+        @inbounds V += v[i, j, k′] * Δzᶜᶠᶜ(i, j, k′, grid)
+    end
+    return V
+end
 
 include("compute_w_from_continuity.jl")
+include("hydrostatic_free_surface_field_tuples.jl")
 
 # No free surface
 include("nothing_free_surface.jl")
@@ -59,17 +79,14 @@ include("nothing_free_surface.jl")
 # Explicit free-surface solver functionality
 include("explicit_free_surface.jl")
 
+# Split-Explicit free-surface solver functionality
+include("SplitExplicitFreeSurfaces/SplitExplicitFreeSurfaces.jl")
+using .SplitExplicitFreeSurfaces
+
 # Implicit free-surface solver functionality
-include("implicit_free_surface_utils.jl")
-include("compute_vertically_integrated_variables.jl")
 include("fft_based_implicit_free_surface_solver.jl")
 include("pcg_implicit_free_surface_solver.jl")
 include("implicit_free_surface.jl")
-include("hydrostatic_free_surface_field_tuples.jl")
-
-# Split-Explicit free-surface solver functionality
-include("SplitExplicitFreeSurfaces/SplitExplicitFreeSurfaces.jl")
-using .SplitExplicitFreeSurfaces
 
 # ZStarCoordinate implementation
 include("z_star_vertical_spacing.jl")
@@ -148,7 +165,7 @@ include("compute_hydrostatic_free_surface_buffers.jl")
 include("compute_hydrostatic_flux_bcs.jl")
 include("update_hydrostatic_free_surface_model_state.jl")
 include("hydrostatic_free_surface_ab2_step.jl")
-include("hydrostatic_free_surface_rk3_step.jl")
+include("hydrostatic_free_surface_rk_step.jl")
 include("cache_hydrostatic_free_surface_tendencies.jl")
 include("prescribed_hydrostatic_velocity_fields.jl")
 include("single_column_model_mode.jl")
diff --git a/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/SplitExplicitFreeSurfaces.jl b/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/SplitExplicitFreeSurfaces.jl
index abbd62bd8e..2396ba81e7 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/SplitExplicitFreeSurfaces.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/SplitExplicitFreeSurfaces.jl
@@ -15,7 +15,8 @@ using Oceananigans.ImmersedBoundaries
 using Oceananigans.Grids: AbstractGrid, topology
 using Oceananigans.ImmersedBoundaries: linear_index_to_tuple, mask_immersed_field!
 using Oceananigans.Models.HydrostaticFreeSurfaceModels: AbstractFreeSurface,
-                                                        free_surface_displacement_field
+                                                        free_surface_displacement_field,
+                                                        update_vertical_velocities!
 
 using Adapt
 using Base
@@ -31,6 +32,7 @@ import Oceananigans.Models.HydrostaticFreeSurfaceModels: initialize_free_surface
                                                          materialize_free_surface,
                                                          step_free_surface!,
                                                          compute_free_surface_tendency!,
+                                                         compute_transport_velocities!,
                                                          explicit_barotropic_pressure_x_gradient,
                                                          explicit_barotropic_pressure_y_gradient
 
diff --git a/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/barotropic_split_explicit_corrector.jl b/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/barotropic_split_explicit_corrector.jl
index 775f46d2a1..a5933f4be6 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/barotropic_split_explicit_corrector.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/barotropic_split_explicit_corrector.jl
@@ -27,9 +27,13 @@ function barotropic_split_explicit_corrector!(u, v, free_surface, grid)
     state = free_surface.filtered_state
     η     = free_surface.η
     U, V  = free_surface.barotropic_velocities
-    U̅, V̅  = state.U, state.V
+    U̅, V̅  = state.U̅, state.V̅
     arch  = architecture(grid)
 
+    # Preparing velocities for the barotropic correction
+    mask_immersed_field!(u)
+    mask_immersed_field!(v)
+
     # NOTE: the filtered `U̅` and `V̅` have been copied in the instantaneous `U` and `V`,
     # so we use the filtered velocities as "work arrays" to store the vertical integrals
     # of the instantaneous velocities `u` and `v`.
@@ -44,14 +48,47 @@ end
 
 @kernel function _barotropic_split_explicit_corrector!(u, v, U, V, U̅, V̅, grid)
     i, j, k = @index(Global, NTuple)
+    Hᶠᶜ = column_depthᶠᶜᵃ(i, j, grid)
+    Hᶜᶠ = column_depthᶜᶠᵃ(i, j, grid)
+
+    @inbounds u[i, j, k] = u[i, j, k] + (U[i, j, 1] - U̅[i, j, 1]) / Hᶠᶜ
+    @inbounds v[i, j, k] = v[i, j, k] + (V[i, j, 1] - V̅[i, j, 1]) / Hᶜᶠ
+end
+
+@kernel function _compute_transport_velocities!(ũ, ṽ, grid, Ũ, Ṽ, u, v, U̅, V̅)
+    i, j, k = @index(Global, NTuple)
+    Hᶠᶜ = column_depthᶠᶜᵃ(i, j, grid)
+    Hᶜᶠ = column_depthᶜᶠᵃ(i, j, grid)
+
+    immersedᶜᶠᶜ = peripheral_node(i, j, k, grid, Center(), Face(), Center())
+    immersedᶠᶜᶜ = peripheral_node(i, j, k, grid, Face(), Center(), Center())
 
     @inbounds begin
-        Hᶠᶜ = column_depthᶠᶜᵃ(i, j, grid)
-        Hᶜᶠ = column_depthᶜᶠᵃ(i, j, grid)
+        ũ⁺ = u[i, j, k] + (Ũ[i, j, 1] - U̅[i, j, 1]) / Hᶠᶜ
+        ṽ⁺ = v[i, j, k] + (Ṽ[i, j, 1] - V̅[i, j, 1]) / Hᶜᶠ
 
-        u[i, j, k] = u[i, j, k] + (U[i, j, 1] - U̅[i, j, 1]) / Hᶠᶜ
-        v[i, j, k] = v[i, j, k] + (V[i, j, 1] - V̅[i, j, 1]) / Hᶜᶠ
+        ũ[i, j, k] = ifelse(immersedᶠᶜᶜ, zero(grid), ũ⁺)
+        ṽ[i, j, k] = ifelse(immersedᶜᶠᶜ, zero(grid), ṽ⁺)
     end
 end
 
+function compute_transport_velocities!(model, free_surface::SplitExplicitFreeSurface)
+    grid = model.grid
+    u, v, _ = model.velocities
+    ũ, ṽ, _ = model.transport_velocities
+    Ũ = free_surface.filtered_state.Ũ
+    Ṽ = free_surface.filtered_state.Ṽ
+    U̅ = free_surface.filtered_state.U̅
+    V̅ = free_surface.filtered_state.V̅
 
+    compute_barotropic_mode!(U̅, V̅, grid, u, v)
+    launch!(architecture(grid), grid, :xyz, _compute_transport_velocities!, ũ, ṽ, grid, Ũ, Ṽ, u, v, U̅, V̅)
+
+    # Fill transport velocities 
+    fill_halo_regions!((ũ, ṽ); async=true)
+    
+    # Update grid velocity and vertical transport velocity
+    update_vertical_velocities!(model.transport_velocities, model.grid, model)
+
+    return nothing
+end
diff --git a/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/compute_slow_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/compute_slow_tendencies.jl
index acf2dd0ebc..bec5eb1593 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/compute_slow_tendencies.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/compute_slow_tendencies.jl
@@ -67,14 +67,9 @@ end
     @inbounds GVⁿ[i, j, 1] = G_vertical_integral(i, j, grid, Gvⁿ, Center(), Face(), Center())
 end
 
-@inline function compute_split_explicit_forcing!(GUⁿ, GVⁿ, grid, Guⁿ, Gvⁿ, timestepper::SplitRungeKutta3TimeStepper)
-
-    active_cells_map = get_active_column_map(grid)    
+@inline compute_split_explicit_forcing!(GUⁿ, GVⁿ, grid, Guⁿ, Gvⁿ, ::SplitRungeKuttaTimeStepper) = 
     launch!(architecture(grid), grid, :xy, _compute_integrated_rk3_tendencies!, 
-            GUⁿ, GVⁿ, grid, Guⁿ, Gvⁿ; active_cells_map)
-
-    return nothing
-end
+            GUⁿ, GVⁿ, grid, Guⁿ, Gvⁿ; active_cells_map = get_active_column_map(grid))
 
 #####
 ##### Free surface setup
@@ -93,9 +88,7 @@ function compute_free_surface_tendency!(grid, model, ::SplitExplicitFreeSurface)
     baroclinic_timestepper = model.timestepper
 
     @apply_regionally compute_split_explicit_forcing!(GUⁿ, GVⁿ, grid, Guⁿ, Gvⁿ, baroclinic_timestepper)
-    
-    fields_to_fill = (GUⁿ, GVⁿ)
-    fill_halo_regions!(fields_to_fill; async=true)
+    fill_halo_regions!((GUⁿ, GVⁿ); async=true) 
 
     return nothing
 end
diff --git a/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/distributed_split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/distributed_split_explicit_free_surface.jl
index 3c4080a74c..6e1ec91285 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/distributed_split_explicit_free_surface.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/distributed_split_explicit_free_surface.jl
@@ -1,6 +1,8 @@
 using Oceananigans.DistributedComputations: DistributedField
 using Oceananigans.DistributedComputations: AsynchronousDistributed, synchronize_communication!
 
+import Oceananigans.DistributedComputations: synchronize_communication!
+
 const DistributedSplitExplicit = SplitExplicitFreeSurface{<:DistributedField}
 
 wait_free_surface_communication!(free_surface, model, arch) = nothing
@@ -22,3 +24,15 @@ function wait_free_surface_communication!(free_surface::DistributedSplitExplicit
 
     return nothing
 end
+
+function synchronize_communication!(free_surface::SplitExplicitFreeSurface)
+    η    = free_surface.η
+    U, V = free_surface.barotropic_velocities
+    Ũ, Ṽ = free_surface.filtered_state.Ũ, free_surface.filtered_state.Ṽ
+
+    for field in (U, V, Ũ, Ṽ, η)
+        synchronize_communication!(field)
+    end
+    
+    return nothing
+end
diff --git a/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/initialize_split_explicit_substepping.jl b/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/initialize_split_explicit_substepping.jl
index 0ba88f6a24..9ae346018a 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/initialize_split_explicit_substepping.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/initialize_split_explicit_substepping.jl
@@ -1,5 +1,5 @@
 using Oceananigans.ImmersedBoundaries: get_active_column_map, peripheral_node
-using Oceananigans.TimeSteppers: QuasiAdamsBashforth2TimeStepper, SplitRungeKutta3TimeStepper
+using Oceananigans.TimeSteppers: QuasiAdamsBashforth2TimeStepper, SplitRungeKuttaTimeStepper
 using Oceananigans.Operators: Δz
 
 # This file contains two different initializations methods performed at different stages of the simulation.
@@ -34,15 +34,15 @@ function initialize_free_surface_state!(free_surface, baroclinic_timestepper, ti
 
     initialize_free_surface_timestepper!(timestepper, η, U, V)
 
-    fill!(free_surface.filtered_state.η, 0)
-    fill!(free_surface.filtered_state.U, 0)
-    fill!(free_surface.filtered_state.V, 0)
-
+    for field in free_surface.filtered_state
+        fill!(field, 0)
+    end
+       
     return nothing
 end
 
 # At the last stage we reset the velocities and perform the complete substepping from n to n+1
-function initialize_free_surface_state!(free_surface, baroclinic_ts::SplitRungeKutta3TimeStepper, barotropic_ts)
+function initialize_free_surface_state!(free_surface, baroclinic_ts::SplitRungeKuttaTimeStepper, barotropic_ts)
 
     η = free_surface.η
     U, V = free_surface.barotropic_velocities
@@ -58,9 +58,9 @@ function initialize_free_surface_state!(free_surface, baroclinic_ts::SplitRungeK
 
     initialize_free_surface_timestepper!(barotropic_ts, η, U, V)
 
-    fill!(free_surface.filtered_state.η, 0)
-    fill!(free_surface.filtered_state.U, 0)
-    fill!(free_surface.filtered_state.V, 0)
-
+    for field in free_surface.filtered_state
+        fill!(field, 0)
+    end
+       
     return nothing
 end
diff --git a/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/split_explicit_free_surface.jl
index a98d509b47..e5d4512ebe 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/split_explicit_free_surface.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/split_explicit_free_surface.jl
@@ -120,8 +120,8 @@ end
 # Simplest case: we have the substeps and the averaging kernel
 function split_explicit_substepping(::Nothing, substeps, fixed_Δt, grid, averaging_kernel, gravitational_acceleration)
     FT = eltype(gravitational_acceleration)
-    fractional_step_size, averaging_weights = weights_from_substeps(FT, substeps, averaging_kernel)
-    return FixedSubstepNumber(fractional_step_size, averaging_weights)
+    fractional_step_size, averaging_weights, transport_weights = weights_from_substeps(FT, substeps, averaging_kernel)
+    return FixedSubstepNumber(fractional_step_size, averaging_weights, transport_weights)
 end
 
 # The substeps are calculated dynamically when a cfl without a fixed_Δt is provided
@@ -202,11 +202,12 @@ function materialize_free_surface(free_surface::SplitExplicitFreeSurface, veloci
 
     U = Field{Face, Center, Nothing}(maybe_extended_grid, boundary_conditions = u_bcs)
     V = Field{Center, Face, Nothing}(maybe_extended_grid, boundary_conditions = v_bcs)
-
     U̅ = Field{Face, Center, Nothing}(maybe_extended_grid, boundary_conditions = u_bcs)
     V̅ = Field{Center, Face, Nothing}(maybe_extended_grid, boundary_conditions = v_bcs)
+    Ũ = Field{Face, Center, Nothing}(maybe_extended_grid, boundary_conditions = u_bcs)
+    Ṽ = Field{Center, Face, Nothing}(maybe_extended_grid, boundary_conditions = v_bcs)
 
-    filtered_state = (η = η̅, U = U̅, V = V̅)
+    filtered_state = (η̅ = η̅, U̅ = U̅, V̅ = V̅, Ũ = Ũ, Ṽ = Ṽ)
     barotropic_velocities = (U = U, V = V)
 
     kernel_parameters = maybe_augmented_kernel_parameters(TX, TY, substepping, maybe_extended_grid)
@@ -226,7 +227,6 @@ end
 # (p = 2, q = 4, r = 0.18927) minimize dispersion error from Shchepetkin and McWilliams (2005): https://doi.org/10.1016/j.ocemod.2004.08.002
 @inline function averaging_shape_function(τ::FT; p = 2, q = 4, r = FT(0.18927)) where FT
     τ₀ = (p + 2) * (p + q + 2) / (p + 1) / (p + q + 1)
-
     return (τ / τ₀)^p * (1 - (τ / τ₀)^q) - r * (τ / τ₀)
 end
 
@@ -245,6 +245,7 @@ a fixed number of substeps with time step size of `fractional_step_size * Δt_ba
 struct FixedSubstepNumber{B, F}
     fractional_step_size :: B
     averaging_weights    :: F
+    transport_weights    :: F
 end
 
 function FixedTimeStepSize(grid;
@@ -266,18 +267,26 @@ function FixedTimeStepSize(grid;
 end
 
 @inline function weights_from_substeps(FT, substeps, averaging_kernel)
-
+    M  = substeps ÷ 2
     τᶠ = range(FT(0), FT(2), length = substeps+1)
     Δτ = τᶠ[2] - τᶠ[1]
 
     averaging_weights = map(averaging_kernel, τᶠ[2:end])
-    idx = searchsortedlast(averaging_weights, 0, rev=true)
-    substeps = idx
+    M★ = substeps
+
+    # Find the latest allowable weight
+    for i in substeps:-1:1
+        if averaging_weights[i] > 0
+            M★ = i
+            break
+        end
+    end
 
-    averaging_weights = averaging_weights[1:idx]
+    averaging_weights = averaging_weights[1:M★]
     averaging_weights ./= sum(averaging_weights)
+    transport_weights = [sum(averaging_weights[i:M★]) for i in 1:M★] ./ M
 
-    return Δτ, map(FT, tuple(averaging_weights...))
+    return FT(Δτ), map(FT, tuple(averaging_weights...)), map(FT, tuple(transport_weights...))
 end
 
 Base.summary(s::FixedTimeStepSize)  = string("FixedTimeStepSize($(prettytime(s.Δt_barotropic)))")
@@ -298,7 +307,7 @@ function maybe_extend_halos(TX, TY, grid, substepping::FixedSubstepNumber)
 
     old_halos = halo_size(grid)
     Nsubsteps = length(substepping.averaging_weights)
-    step_halo = Nsubsteps + 1
+    step_halo = Nsubsteps+2
 
     Hx = TX() isa ConnectedTopology ? max(step_halo, old_halos[1]) : old_halos[1]
     Hy = TY() isa ConnectedTopology ? max(step_halo, old_halos[2]) : old_halos[2]
diff --git a/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/split_explicit_timesteppers.jl b/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/split_explicit_timesteppers.jl
index 09dd453f12..c6a6939e99 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/split_explicit_timesteppers.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/split_explicit_timesteppers.jl
@@ -141,12 +141,14 @@ end
 @inline η★(i, j, k, grid,  ::ForwardBackwardScheme, ηᵐ⁺¹) = @inbounds ηᵐ⁺¹[i, j, k]
 @inline η★(i, j, k, grid, t::AdamsBashforth3Scheme, ηᵐ⁺¹) = @inbounds t.δ * ηᵐ⁺¹[i, j, k] + t.μ * t.ηᵐ[i, j, k] + t.γ * t.ηᵐ⁻¹[i, j, k] + t.ϵ * t.ηᵐ⁻²[i, j, k]
 
-@inline cache_previous_velocities!(::ForwardBackwardScheme,   i, j, k, U) = nothing
-@inline cache_previous_free_surface!(::ForwardBackwardScheme, i, j, k, η) = nothing
+@inline   cache_previous_velocities!(::ForwardBackwardScheme, i, j, k, U, V) = nothing
+@inline cache_previous_free_surface!(::ForwardBackwardScheme, i, j, k, η)    = nothing
 
-@inline function cache_previous_velocities!(t::AdamsBashforth3Scheme, i, j, k, U)
+@inline function cache_previous_velocities!(t::AdamsBashforth3Scheme, i, j, k, U, V)
     @inbounds t.Uᵐ⁻²[i, j, k] = t.Uᵐ⁻¹[i, j, k]
     @inbounds t.Uᵐ⁻¹[i, j, k] =      U[i, j, k]
+    @inbounds t.Vᵐ⁻²[i, j, k] = t.Vᵐ⁻¹[i, j, k]
+    @inbounds t.Vᵐ⁻¹[i, j, k] =      V[i, j, k]
 
     return nothing
 end
diff --git a/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/step_split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/step_split_explicit_free_surface.jl
index 2a30a45e84..073b7585aa 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/step_split_explicit_free_surface.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/step_split_explicit_free_surface.jl
@@ -1,4 +1,6 @@
 using Oceananigans.ImmersedBoundaries: MutableGridOfSomeKind
+using Oceananigans.Fields: instantiated_location
+using ..HydrostaticFreeSurfaceModels: barotropic_U, barotropic_V
 
 # Evolution Kernels
 #
@@ -7,43 +9,42 @@ using Oceananigans.ImmersedBoundaries: MutableGridOfSomeKind
 #
 # the free surface field η and its average η̄ are located on `Face`s at the surface (grid.Nz +1). All other intermediate variables
 # (U, V, Ū, V̄) are barotropic fields (`ReducedField`) for which a k index is not defined
-
-@kernel function _split_explicit_free_surface!(grid, Δτ, η, U, V, timestepper)
+@kernel function _split_explicit_barotropic_velocity!(transport_weight, grid, Δτ, η, U, V, Gᵁ, Gⱽ, g, Ũ, Ṽ, timestepper)
     i, j = @index(Global, NTuple)
     k_top = grid.Nz+1
 
-    cache_previous_free_surface!(timestepper, i, j, k_top, η)
-    @inbounds  η[i, j, k_top] -= Δτ * (δxTᶜᵃᵃ(i, j, grid.Nz, grid, Δy_qᶠᶜᶠ, U★, timestepper, U) +
-                                       δyTᵃᶜᵃ(i, j, grid.Nz, grid, Δx_qᶜᶠᶠ, U★, timestepper, V)) * Az⁻¹ᶜᶜᶠ(i, j, k_top, grid)
+    cache_previous_velocities!(timestepper, i, j, 1, U, V)
+
+    Hᶠᶜ = column_depthᶠᶜᵃ(i, j, k_top, grid, η)
+    Hᶜᶠ = column_depthᶜᶠᵃ(i, j, k_top, grid, η)
+    
+    # ∂τ(U) = - ∇η + G
+    @inbounds begin
+        U[i, j, 1] += Δτ * (- g * Hᶠᶜ * ∂xTᶠᶜᶠ(i, j, k_top, grid, η★, timestepper, η) + Gᵁ[i, j, 1])
+        V[i, j, 1] += Δτ * (- g * Hᶜᶠ * ∂yTᶜᶠᶠ(i, j, k_top, grid, η★, timestepper, η) + Gⱽ[i, j, 1])
+        
+        # averaging the transport
+        Ũ[i, j, 1] += transport_weight * U[i, j, 1]
+        Ṽ[i, j, 1] += transport_weight * V[i, j, 1]
+    end
 end
 
-@kernel function _split_explicit_barotropic_velocity!(averaging_weight, grid, Δτ,
-                                                      η, U, V,
-                                                      η̅, U̅, V̅,
-                                                      Gᵁ, Gⱽ, g,
-                                                      timestepper)
+@kernel function _split_explicit_free_surface!(averaging_weight, grid, Δτ, η, U, V, F, clock, η̅, U̅, V̅, timestepper)
     i, j = @index(Global, NTuple)
     k_top = grid.Nz+1
 
-    cache_previous_velocities!(timestepper, i, j, 1, U)
-    cache_previous_velocities!(timestepper, i, j, 1, V)
+    cache_previous_free_surface!(timestepper, i, j, k_top, η)
 
-    Hᶠᶜ = column_depthᶠᶜᵃ(i, j, k_top, grid, η)
-    Hᶜᶠ = column_depthᶜᶠᵃ(i, j, k_top, grid, η)
+    δh_U = (δxTᶜᵃᵃ(i, j, grid.Nz, grid, Δy_qᶠᶜᶠ, U★, timestepper, U) +
+            δyTᵃᶜᵃ(i, j, grid.Nz, grid, Δx_qᶜᶠᶠ, U★, timestepper, V)) * Az⁻¹ᶜᶜᶠ(i, j, k_top, grid) 
 
     @inbounds begin
-        # ∂τ(U) = - ∇η + G
-        Uᵐ⁺¹ = U[i, j, 1] + Δτ * (- g * Hᶠᶜ * ∂xTᶠᶜᶠ(i, j, k_top, grid, η★, timestepper, η) + Gᵁ[i, j, 1])
-        Vᵐ⁺¹ = V[i, j, 1] + Δτ * (- g * Hᶜᶠ * ∂yTᶜᶠᶠ(i, j, k_top, grid, η★, timestepper, η) + Gⱽ[i, j, 1])
+        η[i, j, k_top] += Δτ * (F(i, j, k_top, grid, clock, (; η, U, V)) - δh_U)
 
         # time-averaging
         η̅[i, j, k_top] += averaging_weight * η[i, j, k_top]
-        U̅[i, j, 1]     += averaging_weight * Uᵐ⁺¹
-        V̅[i, j, 1]     += averaging_weight * Vᵐ⁺¹
-
-        # Updating the velocities
-        U[i, j, 1] = Uᵐ⁺¹
-        V[i, j, 1] = Vᵐ⁺¹
+        U̅[i, j, 1]     += averaging_weight * U[i, j, 1]
+        V̅[i, j, 1]     += averaging_weight * V[i, j, 1]
     end
 end
 
@@ -58,11 +59,10 @@ const MINIMUM_SUBSTEPS = 5
 @inline calculate_substeps(substepping::FNS, Δt=nothing) = length(substepping.averaging_weights)
 @inline calculate_substeps(substepping::FTS, Δt) = max(MINIMUM_SUBSTEPS, ceil(Int, 2 * Δt / substepping.Δt_barotropic))
 
-@inline calculate_adaptive_settings(substepping::FNS, substeps) = substepping.fractional_step_size, substepping.averaging_weights
-@inline calculate_adaptive_settings(substepping::FTS, substeps) = weights_from_substeps(eltype(substepping.Δt_barotropic),
-                                                                                        substeps, substepping.averaging_kernel)
+@inline calculate_adaptive_settings(substepping::FNS, substeps) = substepping.fractional_step_size, substepping.averaging_weights, substepping.transport_weights
+@inline calculate_adaptive_settings(substepping::FTS, substeps) = weights_from_substeps(eltype(substepping.Δt_barotropic), substeps, substepping.averaging_kernel)
 
-function iterate_split_explicit!(free_surface, grid, GUⁿ, GVⁿ, Δτᴮ, weights, ::Val{Nsubsteps}) where Nsubsteps
+function iterate_split_explicit!(free_surface, grid, GUⁿ, GVⁿ, Δτᴮ, F, clock, weights, transport_weights, ::Val{Nsubsteps}) where Nsubsteps
     arch = architecture(grid)
 
     η           = free_surface.η
@@ -74,17 +74,14 @@ function iterate_split_explicit!(free_surface, grid, GUⁿ, GVⁿ, Δτᴮ, weig
 
     # unpack state quantities, parameters and forcing terms
     U, V    = free_surface.barotropic_velocities
-    η̅, U̅, V̅ = state.η, state.U, state.V
+    η̅, U̅, V̅ = state.η̅, state.U̅, state.V̅
+    Ũ, Ṽ    = state.Ũ, state.Ṽ
 
-    free_surface_kernel!, _        = configure_kernel(arch, grid, parameters, _split_explicit_free_surface!, nothing, nothing)
-    barotropic_velocity_kernel!, _ = configure_kernel(arch, grid, parameters, _split_explicit_barotropic_velocity!, nothing, nothing)
+    barotropic_velocity_kernel!, _ = configure_kernel(arch, grid, parameters, _split_explicit_barotropic_velocity!)
+    free_surface_kernel!, _        = configure_kernel(arch, grid, parameters, _split_explicit_free_surface!)
 
-    η_args = (grid, Δτᴮ, η, U, V,
-              timestepper)
-
-    U_args = (grid, Δτᴮ, η, U, V,
-              η̅, U̅, V̅, GUⁿ, GVⁿ, g,
-              timestepper)
+    U_args = (grid, Δτᴮ, η, U, V, GUⁿ, GVⁿ, g, Ũ, Ṽ, timestepper)
+    η_args = (grid, Δτᴮ, η, U, V, F, clock, η̅, U̅, V̅, timestepper)
 
     GC.@preserve η_args U_args begin
 
@@ -95,30 +92,32 @@ function iterate_split_explicit!(free_surface, grid, GUⁿ, GVⁿ, Δτᴮ, weig
         converted_η_args = convert_to_device(arch, η_args)
         converted_U_args = convert_to_device(arch, U_args)
 
-        @unroll for substep in 1:Nsubsteps
-            Base.@_inline_meta
-            averaging_weight = weights[substep]
-            free_surface_kernel!(converted_η_args...)
-            barotropic_velocity_kernel!(averaging_weight, converted_U_args...)
+        for substep in 1:Nsubsteps
+            @inbounds averaging_weight = weights[substep]
+            @inbounds transport_weight = transport_weights[substep]
+            
+            # Advance barotropic velocities
+            barotropic_velocity_kernel!(transport_weight, converted_U_args...)
+            free_surface_kernel!(averaging_weight, converted_η_args...)
         end
     end
 
     return nothing
 end
 
-@kernel function _update_split_explicit_state!(η, U, V, grid, η̅, U̅, V̅)
+@kernel function _update_split_explicit_state!(η, U, V, grid, state)
     i, j = @index(Global, NTuple)
     k_top = grid.Nz+1
 
     @inbounds begin
-        η[i, j, k_top] = η̅[i, j, k_top]
-        U[i, j, 1]     = U̅[i, j, 1]
-        V[i, j, 1]     = V̅[i, j, 1]
+        η[i, j, k_top] = state.η̅[i, j, k_top]
+        U[i, j, 1]     = state.U̅[i, j, 1]
+        V[i, j, 1]     = state.V̅[i, j, 1]
     end
 end
 
 #####
-##### SplitExplicitFreeSurface barotropic subcylicing
+##### SplitExplicitFreeSurface barotropic subcycling
 #####
 
 function step_free_surface!(free_surface::SplitExplicitFreeSurface, model, baroclinic_timestepper, Δt)
@@ -131,22 +130,13 @@ function step_free_surface!(free_surface::SplitExplicitFreeSurface, model, baroc
 
     barotropic_velocities = free_surface.barotropic_velocities
 
-    # Wait for setup step to finish
-    wait_free_surface_communication!(free_surface, model, architecture(free_surface_grid))
-
     barotropic_timestepper = free_surface.timestepper
     baroclinic_timestepper = model.timestepper
 
-    stage = model.clock.stage
-
-    # Reset the filtered fields and the barotropic timestepper to zero. 
-    # In case of an RK3 timestepper, reset also the free surface state for the last stage.
-    @apply_regionally initialize_free_surface_state!(free_surface, baroclinic_timestepper, barotropic_timestepper)
-
-    # Calculate the substepping parameterers
+    # Calculate the substepping parameters
     # barotropic time step as fraction of baroclinic step and averaging weights
     Nsubsteps = calculate_substeps(substepping, Δt)
-    fractional_Δt, weights = calculate_adaptive_settings(substepping, Nsubsteps)
+    fractional_Δt, weights, transport_weights = calculate_adaptive_settings(substepping, Nsubsteps)
     Nsubsteps = length(weights)
 
     # barotropic time step in seconds
@@ -160,24 +150,27 @@ function step_free_surface!(free_surface::SplitExplicitFreeSurface, model, baroc
     η = free_surface.η
     U = barotropic_velocities.U
     V = barotropic_velocities.V
-    η̅ = filtered_state.η
-    U̅ = filtered_state.U
-    V̅ = filtered_state.V
+    F = model.forcing.η
 
-    # reset free surface averages
+    # Wait for setup step to finish
+    wait_free_surface_communication!(free_surface, model, architecture(free_surface_grid))
+    
     @apply_regionally begin
+        # Reset the filtered fields and the barotropic timestepper to zero. 
+        initialize_free_surface_state!(free_surface, baroclinic_timestepper, barotropic_timestepper)
+        
         # Solve for the free surface at tⁿ⁺¹
-        iterate_split_explicit!(free_surface, free_surface_grid, GUⁿ, GVⁿ, Δτᴮ, weights, Val(Nsubsteps))
+        iterate_split_explicit!(free_surface, free_surface_grid, GUⁿ, GVⁿ, Δτᴮ, F, model.clock, weights, transport_weights, Val(Nsubsteps))
 
         # Update eta and velocities for the next timestep
         # The halos are updated in the `update_state!` function
         launch!(architecture(free_surface_grid), free_surface_grid, :xy,
-                _update_split_explicit_state!, η, U, V, free_surface_grid, η̅, U̅, V̅)
-
-        # Preparing velocities for the barotropic correction
-        mask_immersed_field!(model.velocities.u)
-        mask_immersed_field!(model.velocities.v)
+                _update_split_explicit_state!, η, U, V, free_surface_grid, filtered_state)
     end
 
+    # Fill all the barotropic state
+    fill_halo_regions!((filtered_state.Ũ, filtered_state.Ṽ); async=true)
+    fill_halo_regions!((U, V, η); async=true)
+
     return nothing
 end
diff --git a/src/Models/HydrostaticFreeSurfaceModels/barotropic_pressure_correction.jl b/src/Models/HydrostaticFreeSurfaceModels/barotropic_pressure_correction.jl
index 781153fd87..5abdd0b6d2 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/barotropic_pressure_correction.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/barotropic_pressure_correction.jl
@@ -1,23 +1,20 @@
 using .SplitExplicitFreeSurfaces: barotropic_split_explicit_corrector!
-import Oceananigans.TimeSteppers: compute_pressure_correction!, make_pressure_correction!
-
-compute_pressure_correction!(::HydrostaticFreeSurfaceModel, Δt) = nothing
 
 #####
 ##### Barotropic pressure correction for models with a free surface
 #####
 
-make_pressure_correction!(model::HydrostaticFreeSurfaceModel, Δt; kwargs...) =
-    make_pressure_correction!(model, model.free_surface, Δt; kwargs...)
+correct_barotropic_mode!(model::HydrostaticFreeSurfaceModel, Δt; kwargs...) =
+    correct_barotropic_mode!(model, model.free_surface, Δt; kwargs...)
 
 # Fallback
-make_pressure_correction!(model, free_surface, Δt; kwargs...) = nothing
+correct_barotropic_mode!(model, free_surface, Δt; kwargs...) = nothing
 
 #####
 ##### Barotropic pressure correction for models with an Implicit free surface
 #####
 
-function make_pressure_correction!(model, ::ImplicitFreeSurface, Δt)
+function correct_barotropic_mode!(model, ::ImplicitFreeSurface, Δt)
 
     launch!(model.architecture, model.grid, :xyz,
             _barotropic_pressure_correction!,
@@ -30,7 +27,7 @@ function make_pressure_correction!(model, ::ImplicitFreeSurface, Δt)
     return nothing
 end
 
-function make_pressure_correction!(model, ::SplitExplicitFreeSurface, Δt)
+function correct_barotropic_mode!(model, ::SplitExplicitFreeSurface, Δt)
     u, v, _ = model.velocities
     grid = model.grid
     barotropic_split_explicit_corrector!(u, v, model.free_surface, grid)
diff --git a/src/Models/HydrostaticFreeSurfaceModels/cache_hydrostatic_free_surface_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/cache_hydrostatic_free_surface_tendencies.jl
index 8ada686188..3d904dc0c0 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/cache_hydrostatic_free_surface_tendencies.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/cache_hydrostatic_free_surface_tendencies.jl
@@ -1,14 +1,14 @@
 using KernelAbstractions: @index, @kernel
-
-using Oceananigans.TimeSteppers: _cache_field_tendencies!
-
 using Oceananigans: prognostic_fields
 using Oceananigans.Grids: AbstractGrid
-
 using Oceananigans.Utils: launch!
 
 import Oceananigans.TimeSteppers: cache_previous_tendencies!
 
+#####
+##### Storing previous tendencies for the AB2 update
+#####
+
 """ Store source terms for `η`. """
 @kernel function _cache_free_surface_tendency!(Gη⁻, grid, Gη⁰)
     i, j = @index(Global, NTuple)
@@ -25,6 +25,11 @@ function cache_free_surface_tendency!(::ExplicitFreeSurface, model)
             model.timestepper.Gⁿ.η)
 end
 
+@kernel function _cache_field_tendencies!(G⁻, G⁰)
+    i, j, k = @index(Global, NTuple)
+    @inbounds G⁻[i, j, k] = G⁰[i, j, k]
+end
+
 """ Store previous source terms before updating them. """
 function cache_previous_tendencies!(model::HydrostaticFreeSurfaceModel)
     prognostic_field_names = keys(prognostic_fields(model))
@@ -55,3 +60,32 @@ function cache_previous_tendencies!(model::HydrostaticFreeSurfaceModel)
     return nothing
 end
 
+#####
+##### Storing previous fields for the RK3 update
+#####
+
+# Tracers are multiplied by the vertical coordinate scaling factor
+@kernel function _cache_tracer_fields!(Ψ⁻, grid, Ψⁿ)
+    i, j, k = @index(Global, NTuple)
+    @inbounds Ψ⁻[i, j, k] = Ψⁿ[i, j, k] * σⁿ(i, j, k, grid, Center(), Center(), Center())
+end
+
+function cache_previous_fields!(model::HydrostaticFreeSurfaceModel)
+
+    previous_fields = model.timestepper.Ψ⁻
+    model_fields = prognostic_fields(model)
+    grid = model.grid
+    arch = architecture(grid)
+
+    for name in keys(model_fields)
+        Ψ⁻ = previous_fields[name]
+        Ψⁿ = model_fields[name]
+        if name ∈ keys(model.tracers) # Tracers are stored with the grid scaling
+            launch!(arch, grid, :xyz, _cache_tracer_fields!, Ψ⁻, grid, Ψⁿ)
+        else # Velocities and free surface are stored without the grid scaling
+            parent(Ψ⁻) .= parent(Ψⁿ)
+        end
+    end
+
+    return nothing
+end
diff --git a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_flux_bcs.jl b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_flux_bcs.jl
index d64741642a..02d95180c5 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_flux_bcs.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_flux_bcs.jl
@@ -11,26 +11,26 @@ function compute_flux_bcs!(Gcⁿ, c, arch, args)
     return nothing
 end
 
-""" Apply boundary conditions by adding flux divergences to the right-hand-side. """
-function compute_flux_bc_tendencies!(model::HydrostaticFreeSurfaceModel)
-
-    Gⁿ         = model.timestepper.Gⁿ
-    grid       = model.grid
-    arch       = architecture(grid)
-    velocities = model.velocities
-    tracers    = model.tracers
-    
+@inline function compute_momentum_flux_bcs!(model::HydrostaticFreeSurfaceModel) 
+    Gⁿ   = model.timestepper.Gⁿ
+    grid = model.grid
+    arch = architecture(grid)
     args = (model.clock, fields(model), model.closure, model.buoyancy)
 
-    
-    # Velocity fields
-    for i in (:u, :v)
-        @apply_regionally compute_flux_bcs!(Gⁿ[i], velocities[i], arch, args)
-    end
+    compute_flux_bcs!(Gⁿ.u, model.velocities.u, arch, args)
+    compute_flux_bcs!(Gⁿ.v, model.velocities.v, arch, args)
 
-    # Tracer fields
-    for i in propertynames(tracers)
-        @apply_regionally compute_flux_bcs!(Gⁿ[i], tracers[i], arch, args)
+    return nothing
+end
+
+@inline function compute_tracer_flux_bcs!(model::HydrostaticFreeSurfaceModel) 
+    Gⁿ   = model.timestepper.Gⁿ
+    grid = model.grid
+    arch = architecture(grid)
+    args = (model.clock, fields(model), model.closure, model.buoyancy)
+    
+    for i in propertynames(model.tracers)
+        compute_flux_bcs!(Gⁿ[i], model.tracers[i], arch, args)
     end
 
     return nothing
diff --git a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_buffers.jl b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_buffers.jl
index cea61a687c..f0cc143c60 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_buffers.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_buffers.jl
@@ -1,10 +1,10 @@
 import Oceananigans.Models: compute_buffer_tendencies!
 
 using Oceananigans.Grids: halo_size
-using Oceananigans.DistributedComputations: Distributed, DistributedGrid
+using Oceananigans.DistributedComputations: Distributed, DistributedGrid, AsynchronousDistributed, synchronize_communication!
 using Oceananigans.ImmersedBoundaries: get_active_cells_map, CellMaps
+using Oceananigans.Models: surface_kernel_parameters
 using Oceananigans.Models.NonhydrostaticModels: buffer_tendency_kernel_parameters,
-                                                buffer_p_kernel_parameters,
                                                 buffer_κ_kernel_parameters,
                                                 buffer_parameters
 
@@ -12,31 +12,89 @@ const DistributedActiveInteriorIBG = ImmersedBoundaryGrid{FT, TX, TY, TZ,
                                                           <:DistributedGrid, I, <:CellMaps, S,
                                                           <:Distributed} where {FT, TX, TY, TZ, I, S}
 
+# Fallback
+complete_communication_and_compute_tracer_buffer!(model, grid, arch) = nothing
+complete_communication_and_compute_momentum_buffer!(model, grid, arch) = nothing
+
 # We assume here that top/bottom BC are always synchronized (no partitioning in z)
-function compute_buffer_tendencies!(model::HydrostaticFreeSurfaceModel)
+function complete_communication_and_compute_momentum_buffer!(model::HydrostaticFreeSurfaceModel, ::DistributedGrid, ::AsynchronousDistributed)
     grid = model.grid
     arch = architecture(grid)
 
+    # Synchronize tracers
+    for tracer in model.tracers
+        synchronize_communication!(tracer)
+    end
+    
+    # Synchronize velocities
+    synchronize_communication!(model.velocities.u)
+    synchronize_communication!(model.velocities.v)
+
     w_parameters = buffer_w_kernel_parameters(grid, arch)
-    p_parameters = buffer_p_kernel_parameters(grid, arch)
     κ_parameters = buffer_κ_kernel_parameters(grid, model.closure, arch)
 
-    # We need new values for `w`, `p` and `κ`
-    compute_auxiliaries!(model; w_parameters, p_parameters, κ_parameters)
+    update_vertical_velocities!(model.velocities, grid, model; parameters = w_parameters)
+    update_hydrostatic_pressure!(model.pressure.pHY′, arch, grid, model.buoyancy, model.tracers; parameters = w_parameters)
+    compute_diffusivities!(model.diffusivity_fields, model.closure, model; parameters = κ_parameters)
+    fill_halo_regions!(model.diffusivity_fields; only_local_halos=true)
 
     # parameters for communicating North / South / East / West side
-    compute_buffer_tendency_contributions!(grid, arch, model)
+    @apply_regionally compute_momentum_buffer_contributions!(grid, arch, model)
+
+    return nothing
+end
+
+function compute_momentum_buffer_contributions!(grid, arch, model)
+    kernel_parameters = buffer_tendency_kernel_parameters(grid, arch)
+    compute_hydrostatic_momentum_tendencies!(model, model.velocities, kernel_parameters)
+    return nothing
+end
+
+function compute_momentum_buffer_contributions!(grid::DistributedActiveInteriorIBG, arch, model)
+    maps = grid.interior_active_cells
+
+    for name in (:west_halo_dependent_cells,
+                 :east_halo_dependent_cells,
+                 :south_halo_dependent_cells,
+                 :north_halo_dependent_cells)
+
+        active_cells_map = @inbounds maps[name]
+
+        # If the map == nothing, we don't need to compute the buffer because
+        # the buffer is not adjacent to a processor boundary. 
+        if isnothing(active_cells_map)
+            # We pass `nothing` as parameters since we will use the value in the `active_cells_map` as parameters
+            compute_hydrostatic_momentum_tendencies!(model, model.velocities, nothing; active_cells_map)
+        end
+    end
 
     return nothing
 end
 
-function compute_buffer_tendency_contributions!(grid, arch, model)
+# We assume here that top/bottom BC are always synchronized (no partitioning in z)
+function complete_communication_and_compute_tracer_buffer!(model::HydrostaticFreeSurfaceModel, ::DistributedGrid, ::AsynchronousDistributed)
+    grid = model.grid
+    arch = architecture(grid)
+
+    ũ, ṽ, _ = model.transport_velocities
+    synchronize_communication!(ũ)
+    synchronize_communication!(ṽ)
+    synchronize_communication!(model.free_surface)
+
+    w_parameters = buffer_w_kernel_parameters(grid, arch)
+    update_vertical_velocities!(model.transport_velocities, grid, model; parameters=w_parameters)
+    compute_tracer_buffer_contributions!(grid, arch, model)
+
+    return nothing
+end
+
+function compute_tracer_buffer_contributions!(grid, arch, model)
     kernel_parameters = buffer_tendency_kernel_parameters(grid, arch)
-    compute_hydrostatic_free_surface_tendency_contributions!(model, kernel_parameters)
+    compute_hydrostatic_tracer_tendencies!(model, kernel_parameters)
     return nothing
 end
 
-function compute_buffer_tendency_contributions!(grid::DistributedActiveInteriorIBG, arch, model)
+function compute_tracer_buffer_contributions!(grid::DistributedActiveInteriorIBG, arch, model)
     maps = grid.interior_active_cells
 
     for name in (:west_halo_dependent_cells,
@@ -48,7 +106,9 @@ function compute_buffer_tendency_contributions!(grid::DistributedActiveInteriorI
 
         # If the map == nothing, we don't need to compute the buffer because
         # the buffer is not adjacent to a processor boundary
-        !isnothing(active_cells_map) && compute_hydrostatic_free_surface_tendency_contributions!(model, :xyz; active_cells_map)
+        if !isnothing(active_cells_map)
+            compute_hydrostatic_tracer_tendencies!(model, :xyz; active_cells_map)
+        end
     end
 
     return nothing
@@ -59,15 +119,17 @@ function buffer_w_kernel_parameters(grid, arch)
     Nx, Ny, _ = size(grid)
     Hx, Hy, _ = halo_size(grid)
 
+    xside = isa(grid, XFlatGrid) ? UnitRange(1, Nx) : UnitRange(0, Nx+1)
+    yside = isa(grid, YFlatGrid) ? UnitRange(1, Ny) : UnitRange(0, Ny+1)
+
     # Offsets in tangential direction are == -1 to
     # cover the required corners
-    param_west  = (-Hx+2:1,    0:Ny+1)
-    param_east  = (Nx:Nx+Hx-1, 0:Ny+1)
-    param_south = (0:Nx+1,     -Hy+2:1)
-    param_north = (0:Nx+1,     Ny:Ny+Hy-1)
+    param_west  = (-Hx+2:1,    yside)
+    param_east  = (Nx:Nx+Hx-1, yside)
+    param_south = (xside,     -Hy+2:1)
+    param_north = (xside,     Ny:Ny+Hy-1)
 
     params = (param_west, param_east, param_south, param_north)
 
     return buffer_parameters(params, grid, arch)
 end
-
diff --git a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl
index 4eab930a4a..f076665c88 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl
@@ -13,30 +13,39 @@ using Oceananigans.TurbulenceClosures.TKEBasedVerticalDiffusivities: FlavorOfCAT
 using Oceananigans.ImmersedBoundaries: get_active_cells_map, ActiveInteriorIBG,
                                        linear_index_to_tuple
 
-"""
-    compute_tendencies!(model::HydrostaticFreeSurfaceModel, callbacks)
-
-Calculate the interior and boundary contributions to tendency terms without the
-contribution from non-hydrostatic pressure.
-"""
-function compute_tendencies!(model::HydrostaticFreeSurfaceModel, callbacks)
+function compute_momentum_tendencies!(model::HydrostaticFreeSurfaceModel, callbacks)
 
     grid = model.grid
     arch = architecture(grid)
 
-    # Calculate contributions to momentum and tracer tendencies from fluxes and volume terms in the
-    # interior of the domain. The active cells map restricts the computation to the active cells in the
-    # interior if the grid is _immersed_ and the `active_cells_map` kwarg is active
     active_cells_map = get_active_cells_map(model.grid, Val(:interior))
     kernel_parameters = interior_tendency_kernel_parameters(arch, grid)
 
-    compute_hydrostatic_free_surface_tendency_contributions!(model, kernel_parameters; active_cells_map)
-    complete_communication_and_compute_buffer!(model, grid, arch)
+    compute_hydrostatic_momentum_tendencies!(model, model.velocities, kernel_parameters; active_cells_map)
+    complete_communication_and_compute_momentum_buffer!(model, grid, arch)
+    compute_momentum_flux_bcs!(model)
 
     for callback in callbacks
         callback.callsite isa TendencyCallsite && callback(model)
     end
 
+    return nothing
+end
+
+function compute_tracer_tendencies!(model::HydrostaticFreeSurfaceModel)
+
+    grid = model.grid
+    arch = architecture(grid)
+
+    active_cells_map  = get_active_cells_map(model.grid, Val(:interior))
+    kernel_parameters = interior_tendency_kernel_parameters(arch, grid)
+
+    compute_hydrostatic_tracer_tendencies!(model, kernel_parameters; active_cells_map)
+    complete_communication_and_compute_tracer_buffer!(model, grid, arch)
+    compute_tracer_flux_bcs!(model)
+
+    scale_by_stretching_factor!(model.timestepper.Gⁿ, model.tracers, model.grid)
+
     update_tendencies!(model.biogeochemistry, model)
 
     return nothing
@@ -54,13 +63,11 @@ compute_free_surface_tendency!(grid, model, free_surface) = nothing
 end
 
 """ Store previous value of the source term and compute current source term. """
-function compute_hydrostatic_free_surface_tendency_contributions!(model, kernel_parameters; active_cells_map=nothing)
+function compute_hydrostatic_tracer_tendencies!(model, kernel_parameters; active_cells_map=nothing)
 
     arch = model.architecture
     grid = model.grid
 
-    compute_hydrostatic_momentum_tendencies!(model, model.velocities, kernel_parameters; active_cells_map)
-
     for (tracer_index, tracer_name) in enumerate(propertynames(model.tracers))
 
         @inbounds c_tendency    = model.timestepper.Gⁿ[tracer_name]
@@ -75,7 +82,7 @@ function compute_hydrostatic_free_surface_tendency_contributions!(model, kernel_
                      c_immersed_bc,
                      model.buoyancy,
                      model.biogeochemistry,
-                     model.velocities,
+                     model.transport_velocities,
                      model.free_surface,
                      model.tracers,
                      model.diffusivity_fields,
diff --git a/src/Models/HydrostaticFreeSurfaceModels/compute_vertically_integrated_variables.jl b/src/Models/HydrostaticFreeSurfaceModels/compute_vertically_integrated_variables.jl
deleted file mode 100644
index d9bfe26e3b..0000000000
--- a/src/Models/HydrostaticFreeSurfaceModels/compute_vertically_integrated_variables.jl
+++ /dev/null
@@ -1,39 +0,0 @@
-using Oceananigans.Grids: halo_size
-using Oceananigans.AbstractOperations: Ax, Ay, grid_metric_operation
-# Has to be changed when the regression data is updated
-
-function compute_vertically_integrated_lateral_areas!(∫ᶻ_A)
-
-    # we have to account for halos when calculating Integrated areas, in case
-    # a periodic domain, where it is not guaranteed that ηₙ == ηₙ₊₁
-    # 2 halos (instead of only 1) are necessary to accommodate the preconditioner
-
-    field_grid = ∫ᶻ_A.xᶠᶜᶜ.grid
-
-    Axᶠᶜᶜ = grid_metric_operation((Face, Center, Center), Ax, field_grid)
-    Ayᶜᶠᶜ = grid_metric_operation((Center, Face, Center), Ay, field_grid)
-
-    sum!(∫ᶻ_A.xᶠᶜᶜ, Axᶠᶜᶜ)
-    sum!(∫ᶻ_A.yᶜᶠᶜ, Ayᶜᶠᶜ)
-
-    return nothing
-end
-
-"""
-Compute the vertical integrated volume flux from the bottom to ``z = 0`` (i.e., linear free-surface).
-
-```
-U★ = ∫ᶻ Ax * u★ dz
-V★ = ∫ᶻ Ay * v★ dz
-```
-"""
-### Note - what we really want is RHS = divergence of the vertically integrated volume flux
-###        we can optimize this a bit later to do this all in one go to save using intermediate variables.
-function compute_vertically_integrated_volume_flux!(∫ᶻ_U, velocities)
-
-    # Fill halo regions for predictor velocity.
-    sum!(∫ᶻ_U.u, Ax * velocities.u)
-    sum!(∫ᶻ_U.v, Ay * velocities.v)
-
-    return nothing
-end
diff --git a/src/Models/HydrostaticFreeSurfaceModels/compute_w_from_continuity.jl b/src/Models/HydrostaticFreeSurfaceModels/compute_w_from_continuity.jl
index 2eef2aec6b..4ee880d57f 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/compute_w_from_continuity.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/compute_w_from_continuity.jl
@@ -3,6 +3,16 @@ using Oceananigans.Grids: halo_size, topology
 using Oceananigans.Grids: XFlatGrid, YFlatGrid
 using Oceananigans.Operators: flux_div_xyᶜᶜᶜ, div_xyᶜᶜᶜ, Δzᶜᶜᶜ
 using Oceananigans.ImmersedBoundaries: immersed_cell
+using Oceananigans.Models: surface_kernel_parameters
+
+function update_vertical_velocities!(velocities, grid, model; parameters = surface_kernel_parameters(grid))
+    update_grid_vertical_velocity!(velocities, model, grid, model.vertical_coordinate; parameters)
+    compute_w_from_continuity!(velocities, grid; parameters)
+    return nothing
+end
+
+# A Fallback to be extended for specific ztypes and grid types
+update_grid_vertical_velocity!(velocities, model, grid, ztype; kw...) = nothing
 
 """
     compute_w_from_continuity!(model)
@@ -14,10 +24,10 @@ w^{n+1} = -∫ [∂/∂x (u^{n+1}) + ∂/∂y (v^{n+1})] dz
 ```
 """
 compute_w_from_continuity!(model; kwargs...) =
-    compute_w_from_continuity!(model.velocities, model.architecture, model.grid; kwargs...)
+    compute_w_from_continuity!(model.velocities, model.grid; kwargs...)
 
-compute_w_from_continuity!(velocities, arch, grid; parameters = w_kernel_parameters(grid)) =
-    launch!(arch, grid, parameters, _compute_w_from_continuity!, velocities, grid)
+compute_w_from_continuity!(velocities, grid; parameters = surface_kernel_parameters(grid)) =
+    launch!(architecture(grid), grid, parameters, _compute_w_from_continuity!, velocities, grid)
 
 # If the grid is following the free surface, then the derivative of the moving grid is:
 #
@@ -48,29 +58,13 @@ compute_w_from_continuity!(velocities, arch, grid; parameters = w_kernel_paramet
     Nz = size(grid, 3)
     for k in 2:Nz+1
         δ = flux_div_xyᶜᶜᶜ(i, j, k-1, grid, u, v) * Az⁻¹ᶜᶜᶜ(i, j, k-1, grid)
+        w̃ = Δrᶜᶜᶜ(i, j, k-1, grid) * ∂t_σ(i, j, k-1, grid)
 
         # We do not account for grid changes in immersed cells
-        not_immersed = !immersed_cell(i, j, k-1, grid)
-        w̃ = Δrᶜᶜᶜ(i, j, k-1, grid) * ∂t_σ(i, j, k-1, grid) * not_immersed
+        immersed = immersed_cell(i, j, k-1, grid)
+        w̃ = ifelse(immersed, zero(grid), w̃)
 
         wᵏ -= (δ + w̃)
         @inbounds w[i, j, k] = wᵏ
     end
 end
-
-#####
-##### Size and offsets for the w kernel
-#####
-
-# extend w kernel to compute also the boundaries
-# If Flat, do not calculate on halos!
-@inline function w_kernel_parameters(grid)
-    Nx, Ny, _ = size(grid)
-    Hx, Hy, _ = halo_size(grid)
-    Tx, Ty, _ = topology(grid)
-
-    ii = ifelse(Tx == Flat, 1:Nx, -Hx+2:Nx+Hx-1)
-    jj = ifelse(Ty == Flat, 1:Ny, -Hy+2:Ny+Hy-1)
-
-    return KernelParameters(ii, jj)
-end
diff --git a/src/Models/HydrostaticFreeSurfaceModels/explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/explicit_free_surface.jl
index 43179008b0..a47900913c 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/explicit_free_surface.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/explicit_free_surface.jl
@@ -2,6 +2,8 @@ using Oceananigans.Grids: AbstractGrid
 using Oceananigans.Operators: ∂xᶠᶜᶜ, ∂yᶜᶠᶜ
 using Oceananigans.BoundaryConditions: regularize_field_boundary_conditions
 
+import Oceananigans.DistributedComputations: synchronize_communication! 
+
 using Adapt
 
 """
@@ -35,6 +37,18 @@ function materialize_free_surface(free_surface::ExplicitFreeSurface{Nothing}, ve
     return ExplicitFreeSurface(η, g)
 end
 
+#####
+##### Tendency fields
+#####
+
+function hydrostatic_tendency_fields(velocities, free_surface::ExplicitFreeSurface, grid, tracer_names, bcs)
+    u = XFaceField(grid, boundary_conditions=bcs.u)
+    v = YFaceField(grid, boundary_conditions=bcs.v)
+    η = free_surface_displacement_field(velocities, free_surface, grid)
+    tracers = TracerFields(tracer_names, grid, bcs)
+    return merge((u=u, v=v, η=η), tracers)
+end
+
 #####
 ##### Kernel functions for HydrostaticFreeSurfaceModel
 #####
@@ -49,15 +63,21 @@ end
 ##### Time stepping
 #####
 
-step_free_surface!(free_surface::ExplicitFreeSurface, model, timestepper::QuasiAdamsBashforth2TimeStepper, Δt) =
+# Only the free surface needs to be synchronized
+synchronize_communication!(free_surface::ExplicitFreeSurface) = 
+    synchronize_communication!(free_surface.η)
+
+function step_free_surface!(free_surface::ExplicitFreeSurface, model, timestepper::QuasiAdamsBashforth2TimeStepper, Δt) 
     @apply_regionally explicit_ab2_step_free_surface!(free_surface, model, Δt)
+    fill_halo_regions!(free_surface.η; async=true)
+    return nothing
+end
 
-step_free_surface!(free_surface::ExplicitFreeSurface, model, timestepper::SplitRungeKutta3TimeStepper, Δt) =
+function step_free_surface!(free_surface::ExplicitFreeSurface, model, timestepper::SplitRungeKuttaTimeStepper, Δt)
     @apply_regionally explicit_rk3_step_free_surface!(free_surface, model, Δt)
-
-@inline rk3_coeffs(ts, ::Val{1}) = (1,     0)
-@inline rk3_coeffs(ts, ::Val{2}) = (ts.γ², ts.ζ²)
-@inline rk3_coeffs(ts, ::Val{3}) = (ts.γ³, ts.ζ³)
+    fill_halo_regions!(free_surface.η; async=true)
+    return nothing
+end
 
 explicit_rk3_step_free_surface!(free_surface, model, Δt) = 
     launch!(model.architecture, model.grid, :xy,
diff --git a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl
index 7ee0f499a8..0d97e2a1b7 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl
@@ -9,28 +9,72 @@ import Oceananigans.TimeSteppers: ab2_step!
 ##### Step everything
 #####
 
-function ab2_step!(model::HydrostaticFreeSurfaceModel, Δt)
+ab2_step!(model::HydrostaticFreeSurfaceModel, Δt, callbacks) =
+    ab2_step!(model, model.free_surface, model.grid, Δt, callbacks)
 
-    grid = model.grid
+function ab2_step!(model, free_surface, grid, Δt, callbacks)
+    FT = eltype(grid)
+    χ  = convert(FT, model.timestepper.χ)
+    Δt = convert(FT, Δt)
+
+    # Computing Baroclinic and Barotropic tendencies
+    compute_momentum_tendencies!(model, callbacks)
     compute_free_surface_tendency!(grid, model, model.free_surface)
 
+    # Advance the free surface
+    step_free_surface!(model.free_surface, model, model.timestepper, Δt)
+
+    # Computing tracer tendencies
+    compute_transport_velocities!(model, model.free_surface)
+    compute_tracer_tendencies!(model)
+
+    # Advance grid and velocities
+    ab2_step_grid!(model.grid, model, model.vertical_coordinate, Δt, χ)
+    ab2_step_velocities!(model.velocities, model, Δt, χ)
+    
+    # Correct the barotropic mode
+    correct_barotropic_mode!(model, Δt)
+
+    # TODO: fill halo regions for horizontal velocities should be here before the tracer update.   
+    # Finally advance tracers:
+    ab2_step_tracers!(model.tracers, model, Δt, χ)
+
+    return nothing
+end
+
+function ab2_step!(model, ::ImplicitFreeSurface, grid, Δt, callbacks)
     FT = eltype(grid)
     χ  = convert(FT, model.timestepper.χ)
     Δt = convert(FT, Δt)
 
-    # Step locally velocity and tracers
-    @apply_regionally begin
-        scale_by_stretching_factor!(model.timestepper.Gⁿ, model.tracers, model.grid)
-        ab2_step_grid!(model.grid, model, model.vertical_coordinate, Δt, χ)
-        ab2_step_velocities!(model.velocities, model, Δt, χ)
-        ab2_step_tracers!(model.tracers, model, Δt, χ)
-    end
+    # Computing Baroclinic and Barotropic tendencies
+    compute_momentum_tendencies!(model, callbacks)
+    compute_tracer_tendencies!(model)
+    compute_free_surface_tendency!(grid, model, model.free_surface)
+    
+    # Finally Substep! Advance grid, tracers, and momentum
+    ab2_step_grid!(model.grid, model, model.vertical_coordinate, Δt, χ)
+    ab2_step_velocities!(model.velocities, model, Δt, χ)
 
+    # Advance the free surface
     step_free_surface!(model.free_surface, model, model.timestepper, Δt)
+    
+    # Correct the barotropic mode
+    correct_barotropic_mode!(model, Δt)
+
+    # TODO: fill halo regions for horizontal velocities should be here before the tracer update.
+    ab2_step_tracers!(model.tracers, model, Δt, χ)
 
     return nothing
 end
 
+#####
+##### Step grid
+#####
+
+# A Fallback to be extended for specific ztypes and grid types
+ab2_step_grid!(grid, model, ztype, Δt, χ) = nothing
+
 #####
 ##### Step velocities
 #####
diff --git a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_field_tuples.jl b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_field_tuples.jl
index c305d7de05..309aa9af76 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_field_tuples.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_field_tuples.jl
@@ -12,13 +12,8 @@ function hydrostatic_tendency_fields(velocities, free_surface, grid, tracer_name
     return merge((u=u, v=v), tracers)
 end
 
-function hydrostatic_tendency_fields(velocities, free_surface::ExplicitFreeSurface, grid, tracer_names, bcs)
-    u = XFaceField(grid, boundary_conditions=bcs.u)
-    v = YFaceField(grid, boundary_conditions=bcs.v)
-    η = free_surface_displacement_field(velocities, free_surface, grid)
-    tracers = TracerFields(tracer_names, grid, bcs)
-    return merge((u=u, v=v, η=η), tracers)
-end
+previous_hydrostatic_tendency_fields(timestepper, args...)         = nothing
+previous_hydrostatic_tendency_fields(timestepper::Symbol, args...) = previous_hydrostatic_tendency_fields(Val(timestepper), args...)
 
 previous_hydrostatic_tendency_fields(::Val{:QuasiAdamsBashforth2}, args...) = hydrostatic_tendency_fields(args...)
-previous_hydrostatic_tendency_fields(::Val{:SplitRungeKutta3}, args...)     = nothing
+previous_hydrostatic_tendency_fields(::Val{:SplitRungeKutta}, args...)      = nothing
diff --git a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_model.jl b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_model.jl
index a4cdf31ff7..5124208619 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_model.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_model.jl
@@ -8,10 +8,10 @@ using Oceananigans.BoundaryConditions: regularize_field_boundary_conditions
 using Oceananigans.Biogeochemistry: validate_biogeochemistry, AbstractBiogeochemistry, biogeochemical_auxiliary_fields
 using Oceananigans.Fields: Field, CenterField, tracernames, VelocityFields, TracerFields
 using Oceananigans.Forcings: model_forcing
-using Oceananigans.Grids: AbstractCurvilinearGrid, AbstractHorizontallyCurvilinearGrid, architecture, halo_size, MutableVerticalDiscretization
+using Oceananigans.Grids: AbstractCurvilinearGrid, AbstractHorizontallyCurvilinearGrid, architecture, halo_size, MutableVerticalDiscretization, StaticVerticalDiscretization
 using Oceananigans.ImmersedBoundaries: ImmersedBoundaryGrid
 using Oceananigans.Models: AbstractModel, validate_model_halo, validate_tracer_advection, extract_boundary_conditions, initialization_update_state!
-using Oceananigans.TimeSteppers: Clock, TimeStepper, update_state!, AbstractLagrangianParticles, SplitRungeKutta3TimeStepper
+using Oceananigans.TimeSteppers: Clock, TimeStepper, update_state!, AbstractLagrangianParticles, SplitRungeKuttaTimeStepper
 using Oceananigans.TurbulenceClosures: validate_closure, with_tracers, build_diffusivity_fields, add_closure_specific_boundary_conditions
 using Oceananigans.TurbulenceClosures: time_discretization, implicit_diffusion_solver
 using Oceananigans.Utils: tupleit
@@ -28,36 +28,37 @@ const AbstractBGCOrNothing = Union{Nothing, AbstractBiogeochemistry}
 
 function default_vertical_coordinate(grid)
     if grid.z isa MutableVerticalDiscretization
-        return ZStarCoordinate(grid)
+        return ZStarCoordinate()
     else
         return ZCoordinate()
     end
 end
 
 mutable struct HydrostaticFreeSurfaceModel{TS, E, A<:AbstractArchitecture, S,
-                                           G, T, V, B, R, F, P, BGC, U, C, Φ, K, AF, Z} <: AbstractModel{TS, A}
-
-    architecture :: A           # Computer `Architecture` on which `Model` is run
-    grid :: G                   # Grid of physical points on which `Model` is solved
-    clock :: Clock{T}           # Tracks iteration number and simulation time of `Model`
-    advection :: V              # Advection scheme for tracers
-    buoyancy :: B               # Set of parameters for buoyancy model
-    coriolis :: R               # Set of parameters for the background rotation rate of `Model`
-    free_surface :: S           # Free surface parameters and fields
-    forcing :: F                # Container for forcing functions defined by the user
-    closure :: E                # Diffusive 'turbulence closure' for all model fields
-    particles :: P              # Particle set for Lagrangian tracking
-    biogeochemistry :: BGC      # Biogeochemistry for Oceananigans tracers
-    velocities :: U             # Container for velocity fields `u`, `v`, and `w`
-    tracers :: C                # Container for tracer fields
-    pressure :: Φ               # Container for hydrostatic pressure
-    diffusivity_fields :: K     # Container for turbulent diffusivities
-    timestepper :: TS           # Object containing timestepper fields and parameters
-    auxiliary_fields :: AF      # User-specified auxiliary fields for forcing functions and boundary conditions
-    vertical_coordinate :: Z    # Rulesets that define the time-evolution of the grid
+                                           G, T, V, B, R, F, P, BGC, U, W, C, Φ, K, AF, Z} <: AbstractModel{TS, A}
+
+    architecture :: A          # Computer `Architecture` on which `Model` is run
+    grid :: G                  # Grid of physical points on which `Model` is solved
+    clock :: Clock{T}          # Tracks iteration number and simulation time of `Model`
+    advection :: V             # Advection scheme for tracers
+    buoyancy :: B              # Set of parameters for buoyancy model
+    coriolis :: R              # Set of parameters for the background rotation rate of `Model`
+    free_surface :: S          # Free surface parameters and fields
+    forcing :: F               # Container for forcing functions defined by the user
+    closure :: E               # Diffusive 'turbulence closure' for all model fields
+    particles :: P             # Particle set for Lagrangian tracking
+    biogeochemistry :: BGC     # Biogeochemistry for Oceananigans tracers
+    velocities :: U            # Container for velocity fields `u`, `v`, and `w`
+    transport_velocities :: W  # Container for velocity fields used to transport tracers
+    tracers :: C               # Container for tracer fields
+    pressure :: Φ              # Container for hydrostatic pressure
+    diffusivity_fields :: K    # Container for turbulent diffusivities
+    timestepper :: TS          # Object containing timestepper fields and parameters
+    auxiliary_fields :: AF     # User-specified auxiliary fields for forcing functions and boundary conditions
+    vertical_coordinate :: Z   # Rulesets that define the time-evolution of the grid
 end
 
-default_free_surface(grid::XYRegularRG; gravitational_acceleration=defaults.gravitational_acceleration) =
+default_free_surface(grid::XYRegularStaticRG; gravitational_acceleration=g_Earth) =
     ImplicitFreeSurface(; gravitational_acceleration)
 
 default_free_surface(grid; gravitational_acceleration=defaults.gravitational_acceleration) =
@@ -65,24 +66,24 @@ default_free_surface(grid; gravitational_acceleration=defaults.gravitational_acc
 
 """
     HydrostaticFreeSurfaceModel(; grid,
-                                clock = Clock{Float64}(time = 0),
-                                momentum_advection = VectorInvariant(),
-                                tracer_advection = Centered(),
-                                buoyancy = SeawaterBuoyancy(eltype(grid)),
-                                coriolis = nothing,
-                                free_surface = [default_free_surface],
-                                forcing::NamedTuple = NamedTuple(),
-                                closure = nothing,
-                                timestepper = :QuasiAdamsBashforth2,
-                                boundary_conditions::NamedTuple = NamedTuple(),
-                                tracers = (:T, :S),
-                                particles::ParticlesOrNothing = nothing,
-                                biogeochemistry::AbstractBGCOrNothing = nothing,
-                                velocities = nothing,
-                                pressure = nothing,
-                                diffusivity_fields = nothing,
-                                auxiliary_fields = NamedTuple(),
-                                vertical_coordinate = default_vertical_coordinate(grid))
+                                  clock = Clock{Float64}(time = 0),
+                                  momentum_advection = VectorInvariant(),
+                                  tracer_advection = Centered(),
+                                  buoyancy = SeawaterBuoyancy(eltype(grid)),
+                                  coriolis = nothing,
+                                  free_surface = [default_free_surface],
+                                  forcing::NamedTuple = NamedTuple(),
+                                  closure = nothing,
+                                  timestepper = :QuasiAdamsBashforth2,
+                                  boundary_conditions::NamedTuple = NamedTuple(),
+                                  tracers = (:T, :S),
+                                  particles::ParticlesOrNothing = nothing,
+                                  biogeochemistry::AbstractBGCOrNothing = nothing,
+                                  velocities = nothing,
+                                  pressure = nothing,
+                                  diffusivity_fields = nothing,
+                                  auxiliary_fields = NamedTuple(),
+                                  vertical_coordinate = default_vertical_coordinate(grid))
 
 Construct a hydrostatic model with a free surface on `grid`.
 
@@ -106,7 +107,7 @@ Keyword arguments
   - `forcing`: `NamedTuple` of user-defined forcing functions that contribute to solution tendencies.
   - `closure`: The turbulence closure for `model`. See `Oceananigans.TurbulenceClosures`.
   - `timestepper`: A symbol that specifies the time-stepping method.
-                   Either `:QuasiAdamsBashforth2` (default) or `:SplitRungeKutta3`.
+                   Either `:QuasiAdamsBashforth2` (default) or `:SplitRungeKutta`.
   - `boundary_conditions`: `NamedTuple` containing field boundary conditions.
   - `particles`: Lagrangian particles to be advected with the flow. Default: `nothing`.
   - `biogeochemistry`: Biogeochemical model for `tracers`.
@@ -120,25 +121,25 @@ Keyword arguments
                            `ZCoordinate()`.
 """
 function HydrostaticFreeSurfaceModel(; grid,
-    clock = Clock(grid),
-    momentum_advection = VectorInvariant(),
-    tracer_advection = Centered(),
-    buoyancy = nothing,
-    coriolis = nothing,
-    free_surface = default_free_surface(grid, gravitational_acceleration=defaults.gravitational_acceleration),
-    tracers = nothing,
-    forcing::NamedTuple = NamedTuple(),
-    closure = nothing,
-    timestepper = :QuasiAdamsBashforth2,
-    boundary_conditions::NamedTuple = NamedTuple(),
-    particles::ParticlesOrNothing = nothing,
-    biogeochemistry::AbstractBGCOrNothing = nothing,
-    velocities = nothing,
-    pressure = nothing,
-    diffusivity_fields = nothing,
-    auxiliary_fields = NamedTuple(),
-    vertical_coordinate = default_vertical_coordinate(grid))
-
+                                       clock = Clock(grid),
+                                       momentum_advection = VectorInvariant(),
+                                       tracer_advection = Centered(),
+                                       buoyancy = nothing,
+                                       coriolis = nothing,
+                                       free_surface = default_free_surface(grid, gravitational_acceleration=defaults.gravitational_acceleration),
+                                       tracers = nothing,
+                                       forcing::NamedTuple = NamedTuple(),
+                                       closure = nothing,
+                                       timestepper = :QuasiAdamsBashforth2,
+                                       boundary_conditions::NamedTuple = NamedTuple(),
+                                       particles::ParticlesOrNothing = nothing,
+                                       biogeochemistry::AbstractBGCOrNothing = nothing,
+                                       velocities = nothing,
+                                       pressure = nothing,
+                                       diffusivity_fields = nothing,
+                                       auxiliary_fields = NamedTuple(),
+                                       vertical_coordinate = default_vertical_coordinate(grid))
+                                   
     # Check halos and throw an error if the grid's halo is too small
     @apply_regionally validate_model_halo(grid, momentum_advection, tracer_advection, closure)
 
@@ -218,24 +219,31 @@ function HydrostaticFreeSurfaceModel(; grid,
     prognostic_fields = hydrostatic_prognostic_fields(velocities, free_surface, tracers)
 
     Gⁿ = hydrostatic_tendency_fields(velocities, free_surface, grid, tracernames(tracers), boundary_conditions)
-    G⁻ = previous_hydrostatic_tendency_fields(Val(timestepper), velocities, free_surface, grid, tracernames(tracers), boundary_conditions)
+    G⁻ = previous_hydrostatic_tendency_fields(timestepper, velocities, free_surface, grid, tracernames(tracers), boundary_conditions)
     timestepper = TimeStepper(timestepper, grid, prognostic_fields; implicit_solver, Gⁿ, G⁻)
 
     # Regularize forcing for model tracer and velocity fields.
     model_fields = merge(prognostic_fields, auxiliary_fields)
     forcing = model_forcing(model_fields; forcing...)
+    transport_velocities = transport_velocity_fields(velocities, free_surface)
 
     !isnothing(particles) && arch isa Distributed && error("LagrangianParticles are not supported on Distributed architectures.")
 
     model = HydrostaticFreeSurfaceModel(arch, grid, clock, advection, buoyancy, coriolis,
-                                        free_surface, forcing, closure, particles, biogeochemistry, velocities, tracers,
-                                        pressure, diffusivity_fields, timestepper, auxiliary_fields, vertical_coordinate)
+                                        free_surface, forcing, closure, particles, biogeochemistry, velocities, transport_velocities, 
+                                        tracers, pressure, diffusivity_fields, timestepper, auxiliary_fields, vertical_coordinate)
 
-    initialization_update_state!(model; compute_tendencies=false)
+    initialization_update_state!(model)
 
     return model
 end
 
+transport_velocity_fields(velocities, free_surface) = velocities
+transport_velocity_fields(velocities, ::SplitExplicitFreeSurface) = 
+    (u = XFaceField(velocities.u.grid; boundary_conditions=velocities.u.boundary_conditions), 
+     v = YFaceField(velocities.v.grid; boundary_conditions=velocities.v.boundary_conditions),
+     w = ZFaceField(velocities.w.grid; boundary_conditions=velocities.w.boundary_conditions))
+
 validate_velocity_boundary_conditions(grid, velocities) = validate_vertical_velocity_boundary_conditions(velocities.w)
 
 function validate_vertical_velocity_boundary_conditions(w)
diff --git a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_rk3_step.jl b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_rk3_step.jl
deleted file mode 100644
index ca2f2b505d..0000000000
--- a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_rk3_step.jl
+++ /dev/null
@@ -1,143 +0,0 @@
-using Oceananigans.Fields: location, instantiated_location
-using Oceananigans.TurbulenceClosures: implicit_step!
-using Oceananigans.ImmersedBoundaries: get_active_cells_map, get_active_column_map
-
-import Oceananigans.TimeSteppers: split_rk3_substep!, _euler_substep_field!, cache_previous_fields!
-
-function split_rk3_substep!(model::HydrostaticFreeSurfaceModel, Δt)
-
-    grid         = model.grid
-    timestepper  = model.timestepper
-    free_surface = model.free_surface
-
-    compute_free_surface_tendency!(grid, model, free_surface)
-
-    @apply_regionally begin
-        scale_by_stretching_factor!(model.timestepper.Gⁿ, model.tracers, model.grid)
-        rk3_substep_grid!(grid, model, model.vertical_coordinate, Δt)
-        rk3_substep_velocities!(model.velocities, model, Δt)
-        rk3_substep_tracers!(model.tracers, model, Δt)
-    end
-
-    step_free_surface!(free_surface, model, timestepper, Δt)
-
-    return nothing
-end
-
-#####
-##### Time stepping in each substep
-#####
-
-function rk3_substep_velocities!(velocities, model, Δt)
-
-    grid = model.grid
-    FT = eltype(grid)
-
-    for name in (:u, :v)
-        Gⁿ = model.timestepper.Gⁿ[name]
-        Ψ⁻ = model.timestepper.Ψ⁻[name]
-        velocity_field = velocities[name]
-
-        launch!(architecture(grid), grid, :xyz,
-                _euler_substep_field!, velocity_field, convert(FT, Δt), Gⁿ, Ψ⁻)
-
-        implicit_step!(velocity_field,
-                       model.timestepper.implicit_solver,
-                       model.closure,
-                       model.diffusivity_fields,
-                       nothing,
-                       model.clock, 
-                       fields(model),
-                       Δt)
-    end
-
-    return nothing
-end
-
-#####
-##### Step Tracers
-#####
-
-rk3_substep_tracers!(::EmptyNamedTuple, model, Δt) = nothing
-
-function rk3_substep_tracers!(tracers, model, Δt)
-
-    closure = model.closure
-    grid = model.grid
-    FT = eltype(grid)
-
-    catke_in_closures = hasclosure(closure, FlavorOfCATKE)
-
-    # Tracer update kernels
-    for (tracer_index, tracer_name) in enumerate(propertynames(tracers))
-        if catke_in_closures && tracer_name == :e
-            @debug "Skipping RK3 step for e"
-        else
-            Gⁿ = model.timestepper.Gⁿ[tracer_name]
-            Ψ⁻ = model.timestepper.Ψ⁻[tracer_name]
-            c  = tracers[tracer_name]
-            closure = model.closure
-
-            launch!(architecture(grid), grid, :xyz,
-                    _euler_substep_tracer_field!, c, grid, convert(FT, Δt), Gⁿ, Ψ⁻)
-
-            implicit_step!(c,
-                           model.timestepper.implicit_solver,
-                           closure,
-                           model.diffusivity_fields,
-                           Val(tracer_index),
-                           model.clock,
-                           fields(model),
-                           Δt)
-        end
-    end
-
-    return nothing
-end
-
-#####
-##### Tracer update in mutable vertical coordinates
-#####
-
-# σc is the evolved quantity, so tracer fields need to be evolved
-# accounting for the stretching factors from the new and the previous time step.
-@kernel function _euler_substep_tracer_field!(c, grid, Δt, Gⁿ, σc⁻)
-    i, j, k = @index(Global, NTuple)
-    σᶜᶜⁿ = σⁿ(i, j, k, grid, Center(), Center(), Center())
-    @inbounds c[i, j, k] = (σc⁻[i, j, k] + Δt * Gⁿ[i, j, k]) / σᶜᶜⁿ
-end
-
-#####
-##### Storing previous fields for the RK3 update
-#####
-
-# Tracers are multiplied by the vertical coordinate scaling factor
-@kernel function _cache_tracer_fields!(Ψ⁻, grid, Ψⁿ)
-    i, j, k = @index(Global, NTuple)
-    @inbounds Ψ⁻[i, j, k] = Ψⁿ[i, j, k] * σⁿ(i, j, k, grid, Center(), Center(), Center())
-end
-
-function cache_previous_fields!(model::HydrostaticFreeSurfaceModel)
-
-    previous_fields = model.timestepper.Ψ⁻
-    model_fields = prognostic_fields(model)
-    grid = model.grid
-    arch = architecture(grid)
-
-    for name in keys(model_fields)
-        Ψ⁻ = previous_fields[name]
-        Ψⁿ = model_fields[name]
-        if name ∈ keys(model.tracers) # Tracers are stored with the grid scaling
-            launch!(arch, grid, :xyz, _cache_tracer_fields!, Ψ⁻, grid, Ψⁿ)
-        else # Velocities and free surface are stored without the grid scaling
-            parent(Ψ⁻) .= parent(Ψⁿ)
-        end
-    end
-
-    if grid isa MutableGridOfSomeKind && model.vertical_coordinate isa ZStarCoordinate
-        # We need to cache the surface height somewhere!
-        parent(model.vertical_coordinate.storage) .= parent(model.grid.z.ηⁿ)
-    end
-
-    return nothing
-end
diff --git a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_rk_step.jl b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_rk_step.jl
new file mode 100644
index 0000000000..13bf3a0eff
--- /dev/null
+++ b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_rk_step.jl
@@ -0,0 +1,166 @@
+using Oceananigans.Fields: location, instantiated_location
+using Oceananigans.TurbulenceClosures: implicit_step!
+using Oceananigans.ImmersedBoundaries: get_active_cells_map, get_active_column_map
+
+import Oceananigans.TimeSteppers: rk_substep!, cache_previous_fields!
+
+rk_substep!(model::HydrostaticFreeSurfaceModel, Δτ, callbacks) =
+    rk_substep!(model, model.free_surface, model.grid, Δτ, callbacks)
+
+# RK3 substep for hydrostatic free surface models, it differs in the order of operations
+# depending on the type of free surface (implicit or explicit)
+#
+# For explicit free surfaces (`ExplicitFreeSurface` and `SplitExplicitFreeSurface`), we first
+# compute the free surface using the integrated momentum baroclinic tendencies,
+# then we advance grid, momentum and tracers. The last step is to reconcile the baroclinic and
+# the barotropic modes by applying a pressure correction to momentum.
+@inline function rk_substep!(model, free_surface, grid, Δτ, callbacks)
+    # Compute barotropic and baroclinic tendencies
+    compute_momentum_tendencies!(model, callbacks)
+    compute_free_surface_tendency!(grid, model, free_surface)
+
+    # Advance the free surface first
+    step_free_surface!(free_surface, model, model.timestepper, Δτ)
+
+    # Compute z-dependent transport velocities
+    compute_transport_velocities!(model, free_surface)
+
+    # compute tracer tendencies
+    compute_tracer_tendencies!(model)
+
+    # Advance grid and velocities
+    rk_substep_grid!(grid, model, model.vertical_coordinate, Δτ)
+    rk_substep_velocities!(model.velocities, model, Δτ)
+
+    # Correct for the updated barotropic mode
+    correct_barotropic_mode!(model, Δτ)
+
+    # TODO: fill halo regions for horizontal velocities should be here before the tracer update.   
+    rk_substep_tracers!(model.tracers, model, Δτ)
+
+    return nothing
+end
+
+# For implicit free surfaces (`ImplicitFreeSurface`), we first advance grid and tracers,
+# we then use a predictor-corrector approach to advance momentum, in which we first
+# advance momentum neglecting the free surface contribution, then, after the computation of
+# the new free surface, we correct momentum to account for the updated free surface.
+@inline function rk_substep!(model, ::ImplicitFreeSurface, grid, Δτ, callbacks)
+
+    # Computing tendencies...
+    compute_momentum_tendencies!(model, callbacks)
+    compute_tracer_tendencies!(model)
+    compute_free_surface_tendency!(grid, model, model.free_surface)
+    
+    # Finally Substep! Advance grid, tracers, (predictor) momentum 
+    rk_substep_grid!(grid, model, model.vertical_coordinate, Δτ)
+    rk_substep_velocities!(model.velocities, model, Δτ)
+
+    # Advancing free surface in preparation for the correction step
+    step_free_surface!(model.free_surface, model, model.timestepper, Δτ)
+
+    # Correct for the updated barotropic mode
+    correct_barotropic_mode!(model, Δτ)
+
+    # TODO: fill halo regions for horizontal velocities should be here before the tracer update.   
+    rk_substep_tracers!(model.tracers, model, Δτ)
+
+    return nothing
+end
+
+#####
+##### Step grid
+#####
+
+# A Fallback to be extended for specific ztypes and grid types
+rk_substep_grid!(grid, model, ztype, Δt) = nothing
+
+#####
+##### Step Velocities
+#####
+
+function rk_substep_velocities!(velocities, model, Δt)
+
+    grid = model.grid
+    FT = eltype(grid)
+
+    for name in (:u, :v)
+        Gⁿ = model.timestepper.Gⁿ[name]
+        Ψ⁻ = model.timestepper.Ψ⁻[name]
+        velocity_field = velocities[name]
+
+        launch!(architecture(grid), grid, :xyz,
+                _euler_substep_field!, velocity_field, convert(FT, Δt), Gⁿ, Ψ⁻)
+
+        implicit_step!(velocity_field,
+                       model.timestepper.implicit_solver,
+                       model.closure,
+                       model.diffusivity_fields,
+                       nothing,
+                       model.clock,
+                       fields(model),
+                       Δt)
+    end
+
+    return nothing
+end
+
+#####
+##### Step Tracers
+#####
+
+rk_substep_tracers!(::EmptyNamedTuple, model, Δt) = nothing
+
+function rk_substep_tracers!(tracers, model, Δt)
+
+    closure = model.closure
+    grid = model.grid
+    FT = eltype(grid)
+
+    catke_in_closures = hasclosure(closure, FlavorOfCATKE)
+
+    # Tracer update kernels
+    for (tracer_index, tracer_name) in enumerate(propertynames(tracers))
+
+        if catke_in_closures && tracer_name == :e
+            @debug "Skipping RK3 step for e"
+        else
+            Gⁿ = model.timestepper.Gⁿ[tracer_name]
+            Ψ⁻ = model.timestepper.Ψ⁻[tracer_name]
+            c  = tracers[tracer_name]
+            closure = model.closure
+
+            launch!(architecture(grid), grid, :xyz,
+                    _euler_substep_tracer_field!, c, grid, convert(FT, Δt), Gⁿ, Ψ⁻)
+
+            implicit_step!(c,
+                           model.timestepper.implicit_solver,
+                           closure,
+                           model.diffusivity_fields,
+                           Val(tracer_index),
+                           model.clock,
+                           fields(model),
+                           Δt)
+        end
+    end
+
+    return nothing
+end
+
+#####
+##### Tracer update in mutable vertical coordinates
+#####
+
+# Velocity evolution kernel
+@kernel function _euler_substep_field!(field, Δt, Gⁿ, Ψ⁻)
+    i, j, k = @index(Global, NTuple)
+    @inbounds field[i, j, k] = Ψ⁻[i, j, k] + Δt * Gⁿ[i, j, k]
+end
+
+# σc is the evolved quantity, so tracer fields need to be evolved
+# accounting for the stretching factors from the new and the previous time step.
+@kernel function _euler_substep_tracer_field!(c, grid, Δt, Gⁿ, σc⁻)
+    i, j, k = @index(Global, NTuple)
+    σᶜᶜⁿ = σⁿ(i, j, k, grid, Center(), Center(), Center())
+    @inbounds c[i, j, k] = (σc⁻[i, j, k] + Δt * Gⁿ[i, j, k]) / σᶜᶜⁿ
+end
diff --git a/src/Models/HydrostaticFreeSurfaceModels/implicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/implicit_free_surface.jl
index 7c521b9498..2bf07055e6 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/implicit_free_surface.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/implicit_free_surface.jl
@@ -1,12 +1,14 @@
 using Oceananigans.Grids: AbstractGrid
 using Oceananigans.Architectures: device
-using Oceananigans.Operators: ∂xᶠᶜᶜ, ∂yᶜᶠᶜ, Δzᶜᶜᶠ, Δzᶜᶜᶜ
+using Oceananigans.Operators: ∂xᶠᶜᶜ, ∂yᶜᶠᶜ, Δzᶜᶜᶠ, Δzᶜᶜᶜ, Δx, Δy
 using Oceananigans.BoundaryConditions: regularize_field_boundary_conditions
 using Oceananigans.Solvers: solve!
 using Oceananigans.Utils: prettysummary
 using Oceananigans.Fields
 using Oceananigans.Utils: prettytime
 
+using Oceananigans.Models.HydrostaticFreeSurfaceModels.SplitExplicitFreeSurfaces: compute_barotropic_mode!
+
 using Adapt
 
 struct ImplicitFreeSurface{E, G, B, I, M, S} <: AbstractFreeSurface{E, G}
@@ -117,7 +119,7 @@ function materialize_free_surface(free_surface::ImplicitFreeSurface{Nothing}, ve
                                free_surface.solver_settings)
 end
 
-build_implicit_step_solver(::Val{:Default}, grid::XYRegularRG, settings, gravitational_acceleration) =
+build_implicit_step_solver(::Val{:Default}, grid::XYRegularStaticRG, settings, gravitational_acceleration) =
     build_implicit_step_solver(Val(:FastFourierTransform), grid, settings, gravitational_acceleration)
 
 build_implicit_step_solver(::Val{:Default}, grid, settings, gravitational_acceleration) =
@@ -140,7 +142,7 @@ function step_free_surface!(free_surface::ImplicitFreeSurface, model, timesteppe
     fill_halo_regions!(model.velocities, model.clock, fields(model))
 
     # Compute right hand side of implicit free surface equation
-    @apply_regionally local_compute_integrated_volume_flux!(∫ᶻQ, model.velocities, arch)
+    @apply_regionally local_compute_integrated_variables!(∫ᶻQ, model.velocities, arch)
     fill_halo_regions!(∫ᶻQ)
 
     compute_implicit_free_surface_right_hand_side!(rhs, solver, g, Δt, ∫ᶻQ, η)
@@ -157,18 +159,21 @@ function step_free_surface!(free_surface::ImplicitFreeSurface, model, timesteppe
     return nothing
 end
 
-function step_free_surface!(free_surface::ImplicitFreeSurface, model, timestepper::SplitRungeKutta3TimeStepper, Δt)
+function step_free_surface!(free_surface::ImplicitFreeSurface, model, timestepper::SplitRungeKuttaTimeStepper, Δt)
     parent(free_surface.η) .= parent(timestepper.Ψ⁻.η)
     step_free_surface!(free_surface, model, nothing, Δt)
     return nothing
 end
 
-function local_compute_integrated_volume_flux!(∫ᶻQ, velocities, arch)
+function local_compute_integrated_variables!(∫ᶻQ, velocities, arch)
+    u, v, _ = velocities
+    U, V = ∫ᶻQ
 
-    foreach(mask_immersed_field!, velocities)
+    grid = u.grid
 
-    # Compute barotropic volume flux. Blocking.
-    compute_vertically_integrated_volume_flux!(∫ᶻQ, velocities)
+    # Compute barotropic volume flux. 
+    foreach(mask_immersed_field!, velocities)
+    compute_barotropic_mode!(U, V, grid, u * Δy, v * Δx)
 
     return nothing
-end
+end
\ No newline at end of file
diff --git a/src/Models/HydrostaticFreeSurfaceModels/implicit_free_surface_utils.jl b/src/Models/HydrostaticFreeSurfaceModels/implicit_free_surface_utils.jl
deleted file mode 100644
index a0879c6bb8..0000000000
--- a/src/Models/HydrostaticFreeSurfaceModels/implicit_free_surface_utils.jl
+++ /dev/null
@@ -1,7 +0,0 @@
-"""
-Compute the horizontal divergence of vertically-uniform quantity using
-vertically-integrated face areas `∫ᶻ_Axᶠᶜᶜ` and `∫ᶻ_Ayᶜᶠᶜ`.
-"""
-@inline Az_∇h²ᶜᶜᶜ(i, j, k, grid, ∫ᶻ_Axᶠᶜᶜ, ∫ᶻ_Ayᶜᶠᶜ, η) =
-    (δxᶜᵃᵃ(i, j, k, grid, ∫ᶻ_Ax_∂x_ηᶠᶜᶜ, ∫ᶻ_Axᶠᶜᶜ, η) +
-     δyᵃᶜᵃ(i, j, k, grid, ∫ᶻ_Ay_∂y_ηᶜᶠᶜ, ∫ᶻ_Ayᶜᶠᶜ, η))
diff --git a/src/Models/HydrostaticFreeSurfaceModels/pcg_implicit_free_surface_solver.jl b/src/Models/HydrostaticFreeSurfaceModels/pcg_implicit_free_surface_solver.jl
index a91802fa8d..dfa3e756f8 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/pcg_implicit_free_surface_solver.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/pcg_implicit_free_surface_solver.jl
@@ -16,17 +16,18 @@ The preconditioned conjugate gradient iterative implicit free-surface solver.
 $(TYPEDFIELDS)
 """
 struct PCGImplicitFreeSurfaceSolver{V, S, R}
-    "The vertically-integrated lateral areas"
     vertically_integrated_lateral_areas :: V
-    "The preconditioned conjugate gradient solver"
     preconditioned_conjugate_gradient_solver :: S
-    "The right hand side of the free surface evolution equation"
     right_hand_side :: R
 end
 
 architecture(solver::PCGImplicitFreeSurfaceSolver) =
     architecture(solver.preconditioned_conjugate_gradient_solver)
 
+# The assumption is that the horizontal spacings do not depend on the z-direction
+@inline integrated_x_area(i, j, k, grid) = column_depthᶠᶜᵃ(i, j, grid) * Δyᶠᶜᵃ(i, j, 1, grid)
+@inline integrated_y_area(i, j, k, grid) = column_depthᶜᶠᵃ(i, j, grid) * Δxᶜᶠᵃ(i, j, 1, grid)
+
 """
     PCGImplicitFreeSurfaceSolver(grid, settings)
 
@@ -44,17 +45,11 @@ step `Δt`, gravitational acceleration `g`, and free surface at time-step `n` `
 function PCGImplicitFreeSurfaceSolver(grid::AbstractGrid, settings, gravitational_acceleration=nothing)
 
     # Initialize vertically integrated lateral face areas
-    ∫ᶻ_Axᶠᶜᶜ = Field{Face, Center, Nothing}(grid)
-    ∫ᶻ_Ayᶜᶠᶜ = Field{Center, Face, Nothing}(grid)
+    ∫ᶻ_Axᶠᶜᶜ = KernelFunctionOperation{Face, Center, Nothing}(integrated_x_area, grid)
+    ∫ᶻ_Ayᶜᶠᶜ = KernelFunctionOperation{Center, Face, Nothing}(integrated_y_area, grid)
 
     vertically_integrated_lateral_areas = (xᶠᶜᶜ = ∫ᶻ_Axᶠᶜᶜ, yᶜᶠᶜ = ∫ᶻ_Ayᶜᶠᶜ)
 
-    @apply_regionally compute_vertically_integrated_lateral_areas!(vertically_integrated_lateral_areas)
-
-    Ax = vertically_integrated_lateral_areas.xᶠᶜᶜ
-    Ay = vertically_integrated_lateral_areas.yᶜᶠᶜ
-    fill_halo_regions!((Ax, Ay); signed=false)
-
     # Set some defaults
     settings = Dict{Symbol, Any}(settings)
     settings[:maxiter] = get(settings, :maxiter, grid.Nx * grid.Ny)
@@ -182,6 +177,14 @@ where  ̂ indicates a vertical integral, and
     @inbounds L_ηⁿ⁺¹[i, j, k_top] = Az_∇h²ᶜᶜᶜ(i, j, k_top, grid, ∫ᶻ_Axᶠᶜᶜ, ∫ᶻ_Ayᶜᶠᶜ, ηⁿ⁺¹) - Az * ηⁿ⁺¹[i, j, k_top] / (g * Δt^2)
 end
 
+"""
+Compute the horizontal divergence of vertically-uniform quantity using
+vertically-integrated face areas `∫ᶻ_Axᶠᶜᶜ` and `∫ᶻ_Ayᶜᶠᶜ`.
+"""
+@inline Az_∇h²ᶜᶜᶜ(i, j, k, grid, ∫ᶻ_Axᶠᶜᶜ, ∫ᶻ_Ayᶜᶠᶜ, η) =
+    (δxᶜᵃᵃ(i, j, k, grid, ∫ᶻ_Ax_∂x_ηᶠᶜᶜ, ∫ᶻ_Axᶠᶜᶜ, η) +
+     δyᵃᶜᵃ(i, j, k, grid, ∫ᶻ_Ay_∂y_ηᶜᶠᶜ, ∫ᶻ_Ayᶜᶠᶜ, η))
+
 #####
 ##### Preconditioners
 #####
diff --git a/src/Models/HydrostaticFreeSurfaceModels/prescribed_hydrostatic_velocity_fields.jl b/src/Models/HydrostaticFreeSurfaceModels/prescribed_hydrostatic_velocity_fields.jl
index 248f4c8b43..cd0ee92be5 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/prescribed_hydrostatic_velocity_fields.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/prescribed_hydrostatic_velocity_fields.jl
@@ -75,8 +75,8 @@ hydrostatic_tendency_fields(::PrescribedVelocityFields, free_surface, grid, trac
 free_surface_names(free_surface, ::PrescribedVelocityFields, grid) = tuple()
 free_surface_names(::SplitExplicitFreeSurface, ::PrescribedVelocityFields, grid) = tuple()
 
-@inline fill_halo_regions!(::PrescribedVelocityFields, args...) = nothing
-@inline fill_halo_regions!(::FunctionField, args...) = nothing
+@inline fill_halo_regions!(::PrescribedVelocityFields, args...; kwargs...) = nothing
+@inline fill_halo_regions!(::FunctionField, args...; kwargs...) = nothing
 
 @inline datatuple(obj::PrescribedVelocityFields) = (; u = datatuple(obj.u), v = datatuple(obj.v), w = datatuple(obj.w))
 @inline velocities(obj::PrescribedVelocityFields) = (u = obj.u, v = obj.v, w = obj.w)
@@ -90,9 +90,10 @@ free_surface_names(::SplitExplicitFreeSurface, ::PrescribedVelocityFields, grid)
 @inline sum_of_velocities(U1, U2, U3::PrescribedVelocityFields) = sum_of_velocities(U1, U2, velocities(U3))
 
 ab2_step_velocities!(::PrescribedVelocityFields, args...) = nothing
-rk3_substep_velocities!(::PrescribedVelocityFields, args...) = nothing
+rk_substep_velocities!(::PrescribedVelocityFields, args...) = nothing
 step_free_surface!(::Nothing, model, timestepper, Δt) = nothing
 compute_w_from_continuity!(::PrescribedVelocityFields, args...; kwargs...) = nothing
+mask_immersed_velocities!(::PrescribedVelocityFields) = nothing
 
 validate_velocity_boundary_conditions(grid, ::PrescribedVelocityFields) = nothing
 extract_boundary_conditions(::PrescribedVelocityFields) = NamedTuple()
@@ -123,8 +124,8 @@ on_architecture(to, velocities::PrescribedVelocityFields) =
                              on_architecture(to, velocities.parameters))
 
 # If the model only tracks particles... do nothing but that!!!
-const OnlyParticleTrackingModel = HydrostaticFreeSurfaceModel{TS, E, A, S, G, T, V, B, R, F, P, U, C} where
-                 {TS, E, A, S, G, T, V, B, R, F, P<:AbstractLagrangianParticles, U<:PrescribedVelocityFields, C<:NamedTuple{(), Tuple{}}}
+const OnlyParticleTrackingModel = HydrostaticFreeSurfaceModel{TS, E, A, S, G, T, V, B, R, F, P, U, W, C} where
+                 {TS, E, A, S, G, T, V, B, R, F, P<:AbstractLagrangianParticles, U<:PrescribedVelocityFields, W<:PrescribedVelocityFields, C<:NamedTuple{(), Tuple{}}}
 
 function time_step!(model::OnlyParticleTrackingModel, Δt; callbacks = [], kwargs...)
     tick!(model.clock, Δt)
diff --git a/src/Models/HydrostaticFreeSurfaceModels/set_hydrostatic_free_surface_model.jl b/src/Models/HydrostaticFreeSurfaceModels/set_hydrostatic_free_surface_model.jl
index e256f653dc..d0685450b9 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/set_hydrostatic_free_surface_model.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/set_hydrostatic_free_surface_model.jl
@@ -66,7 +66,7 @@ model.velocities.u
     end
 
     # initialize!(model)
-    initialization_update_state!(model; compute_tendencies=false)
+    initialization_update_state!(model)
 
     return nothing
 end
diff --git a/src/Models/HydrostaticFreeSurfaceModels/single_column_model_mode.jl b/src/Models/HydrostaticFreeSurfaceModels/single_column_model_mode.jl
index 986f117abe..3f2d5c7e93 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/single_column_model_mode.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/single_column_model_mode.jl
@@ -52,8 +52,8 @@ validate_momentum_advection(momentum_advection, ::SingleColumnGrid) = nothing
 validate_tracer_advection(tracer_advection_tuple::NamedTuple, ::SingleColumnGrid) = Centered(), tracer_advection_tuple
 validate_tracer_advection(tracer_advection::AbstractAdvectionScheme, ::SingleColumnGrid) = tracer_advection, NamedTuple()
 
-compute_w_from_continuity!(velocities, arch, ::SingleColumnGrid; kwargs...) = nothing
-compute_w_from_continuity!(::PrescribedVelocityFields, arch, ::SingleColumnGrid; kwargs...) = nothing
+compute_w_from_continuity!(velocities, ::SingleColumnGrid; kwargs...) = nothing
+compute_w_from_continuity!(::PrescribedVelocityFields, ::SingleColumnGrid; kwargs...) = nothing
 
 #####
 ##### Time-step optimizations
@@ -65,7 +65,7 @@ compute_free_surface_tendency!(::SingleColumnGrid, model, ::SplitExplicitFreeSur
 
 # Fast state update and halo filling
 
-function update_state!(model::HydrostaticFreeSurfaceModel, grid::SingleColumnGrid, callbacks; compute_tendencies=true)
+function update_state!(model::HydrostaticFreeSurfaceModel, grid::SingleColumnGrid, callbacks)
 
     fill_halo_regions!(prognostic_fields(model), model.clock, fields(model))
 
@@ -83,9 +83,6 @@ function update_state!(model::HydrostaticFreeSurfaceModel, grid::SingleColumnGri
 
     update_biogeochemical_state!(model.biogeochemistry, model)
 
-    compute_tendencies &&
-        @apply_regionally compute_tendencies!(model, callbacks)
-
     return nothing
 end
 
diff --git a/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl b/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl
index 16587f9d77..69a60e5ec2 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl
@@ -7,7 +7,7 @@ using Oceananigans.BoundaryConditions: update_boundary_conditions!
 using Oceananigans.TurbulenceClosures: compute_diffusivities!
 using Oceananigans.ImmersedBoundaries: mask_immersed_field!, mask_immersed_field_xy!, inactive_node
 using Oceananigans.Models: update_model_field_time_series!
-using Oceananigans.Models.NonhydrostaticModels: update_hydrostatic_pressure!, p_kernel_parameters
+using Oceananigans.Models.NonhydrostaticModels: update_hydrostatic_pressure!, surface_kernel_parameters
 
 import Oceananigans.Models.NonhydrostaticModels: compute_auxiliaries!
 import Oceananigans.TimeSteppers: update_state!
@@ -18,30 +18,36 @@ compute_auxiliary_fields!(auxiliary_fields) = Tuple(compute!(a) for a in auxilia
 # single column models.
 
 """
-    update_state!(model::HydrostaticFreeSurfaceModel, callbacks=[]; compute_tendencies = true)
+    update_state!(model::HydrostaticFreeSurfaceModel, callbacks=[])
 
 Update peripheral aspects of the model (auxiliary fields, halo regions, diffusivities,
 hydrostatic pressure) to the current model state. If `callbacks` are provided (in an array),
-they are called in the end. Finally, the tendencies for the new time-step are computed if
-`compute_tendencies = true`.
+they are called in the end. 
 """
-update_state!(model::HydrostaticFreeSurfaceModel, callbacks=[]; compute_tendencies = true) =
-         update_state!(model, model.grid, callbacks; compute_tendencies)
+update_state!(model::HydrostaticFreeSurfaceModel, callbacks=[]) =  update_state!(model, model.grid, callbacks)
 
-function update_state!(model::HydrostaticFreeSurfaceModel, grid, callbacks; compute_tendencies = true)
+function update_state!(model::HydrostaticFreeSurfaceModel, grid, callbacks)
 
-    @apply_regionally mask_immersed_model_fields!(model, grid)
-
-    # Update possible FieldTimeSeries used in the model
-    @apply_regionally update_model_field_time_series!(model, model.clock)
+    arch = architecture(grid)
 
-    # Update the boundary conditions
-    @apply_regionally update_boundary_conditions!(fields(model), model)
+    @apply_regionally begin
+        mask_immersed_model_fields!(model, grid)
+        update_model_field_time_series!(model, model.clock)
+        update_boundary_conditions!(fields(model), model)
+    end
+    
+    u = model.velocities.u
+    v = model.velocities.v
+    tracers = model.tracers
 
     # Fill the halos
-    fill_halo_regions!(prognostic_fields(model), model.grid, model.clock, fields(model); async=true)
+    fill_halo_regions!((u, v),  grid, model.clock, fields(model); async=true)
+    fill_halo_regions!(tracers, grid, model.clock, fields(model); async=true)
 
-    @apply_regionally compute_auxiliaries!(model)
+    parameters = surface_kernel_parameters(model.grid)
+    update_vertical_velocities!(model.velocities, model.grid, model; parameters)    
+    update_hydrostatic_pressure!(model.pressure.pHY′, arch, grid, model.buoyancy, model.tracers; parameters)
+    compute_diffusivities!(model.diffusivity_fields, model.closure, model; parameters=:xyz)
 
     fill_halo_regions!(model.diffusivity_fields; only_local_halos=true)
 
@@ -49,49 +55,14 @@ function update_state!(model::HydrostaticFreeSurfaceModel, grid, callbacks; comp
 
     update_biogeochemical_state!(model.biogeochemistry, model)
 
-    compute_tendencies &&
-        @apply_regionally compute_tendencies!(model, callbacks)
-
     return nothing
 end
 
 # Mask immersed fields
 function mask_immersed_model_fields!(model, grid)
-    η = displacement(model.free_surface)
-    fields_to_mask = merge(model.auxiliary_fields, prognostic_fields(model))
-
-    foreach(fields_to_mask) do field
-        if field !== η
-            mask_immersed_field!(field)
-        end
-    end
-    mask_immersed_field_xy!(η, k=size(grid, 3)+1)
-
+    mask_immersed_velocities!(model.velocities)
+    foreach(mask_immersed_field!, model.tracers)
     return nothing
 end
 
-function compute_auxiliaries!(model::HydrostaticFreeSurfaceModel; w_parameters = w_kernel_parameters(model.grid),
-                                                                  p_parameters = p_kernel_parameters(model.grid),
-                                                                  κ_parameters = :xyz)
-
-    grid        = model.grid
-    closure     = model.closure
-    tracers     = model.tracers
-    diffusivity = model.diffusivity_fields
-    buoyancy    = model.buoyancy
-
-    P    = model.pressure.pHY′
-    arch = architecture(grid)
-
-    # Update the vertical velocity to comply with the barotropic correction step
-    update_grid_vertical_velocity!(model, grid, model.vertical_coordinate)
-
-    # Advance diagnostic quantities
-    compute_w_from_continuity!(model; parameters = w_parameters)
-    update_hydrostatic_pressure!(P, arch, grid, buoyancy, tracers; parameters = p_parameters)
-
-    # Update closure diffusivities
-    compute_diffusivities!(diffusivity, closure, model; parameters = κ_parameters)
-
-    return nothing
-end
+mask_immersed_velocities!(velocities) = foreach(mask_immersed_field!, velocities)
diff --git a/src/Models/HydrostaticFreeSurfaceModels/z_star_vertical_spacing.jl b/src/Models/HydrostaticFreeSurfaceModels/z_star_vertical_spacing.jl
index 7bc8e8f30b..d190cdb268 100644
--- a/src/Models/HydrostaticFreeSurfaceModels/z_star_vertical_spacing.jl
+++ b/src/Models/HydrostaticFreeSurfaceModels/z_star_vertical_spacing.jl
@@ -7,122 +7,49 @@ using Oceananigans.ImmersedBoundaries: MutableGridOfSomeKind
 #####
 
 # The easy case
-barotropic_velocities(free_surface::SplitExplicitFreeSurface) = free_surface.barotropic_velocities
+barotropic_transport(free_surface::SplitExplicitFreeSurface) = 
+    (U = free_surface.filtered_state.Ũ, 
+     V = free_surface.filtered_state.Ṽ)
+
+# The easy case
+barotropic_velocities(free_surface::SplitExplicitFreeSurface) = 
+    free_surface.barotropic_velocities
 
 # The "harder" case, barotropic velocities are computed on the fly
 barotropic_velocities(free_surface) = nothing, nothing
+barotropic_transport(free_surface)  = nothing, nothing
 
-# Fallback
-ab2_step_grid!(grid, model, ztype, Δt, χ) = nothing
-
-function zstar_params(grid::AbstractGrid)
-
-    Nx, Ny, _ = size(grid)
-    Hx, Hy, _ = halo_size(grid)
-    Tx, Ty, _ = topology(grid)
-
-    xrange = params_range(Hx, Nx, Tx)
-    yrange = params_range(Hy, Ny, Ty)
-
-    return KernelParameters(xrange, yrange)
-end
-
-params_range(H, N, ::Type{Flat}) = 1:1
-params_range(H, N, T) = -H+2:N+H-1
-
-function ab2_step_grid!(grid::MutableGridOfSomeKind, model, ztype::ZStarCoordinate, Δt, χ)
-
-    # Scalings and free surface
-    σᶜᶜ⁻ = grid.z.σᶜᶜ⁻
-    σᶜᶜⁿ = grid.z.σᶜᶜⁿ
-    σᶠᶜⁿ = grid.z.σᶠᶜⁿ
-    σᶜᶠⁿ = grid.z.σᶜᶠⁿ
-    σᶠᶠⁿ = grid.z.σᶠᶠⁿ
-    ηⁿ   = grid.z.ηⁿ
-    Gⁿ   = ztype.storage
-
-    U, V = barotropic_velocities(model.free_surface)
-    u, v, _ = model.velocities
-
-    params = zstar_params(grid)
-
-    launch!(architecture(grid), grid, params, _ab2_update_grid_scaling!,
-            σᶜᶜⁿ, σᶠᶜⁿ, σᶜᶠⁿ, σᶠᶠⁿ, σᶜᶜ⁻, ηⁿ, Gⁿ, grid, Δt, χ, U, V, u, v)
-
+function ab2_step_grid!(grid::MutableGridOfSomeKind, model, ztype::ZStarCoordinate, Δt, χ) 
+    launch!(architecture(grid), grid, surface_kernel_parameters(grid), _update_zstar_scaling!, model.free_surface.η, grid)
+    parent(grid.z.σᶜᶜ⁻) .= parent(grid.z.σᶜᶜⁿ)
     return nothing
 end
 
-# Update η in the grid
-# Note!!! This η is different than the free surface coming from the barotropic step!!
-# This η is the one used to compute the vertical spacing.
-# TODO: The two different free surfaces need to be reconciled.
-@kernel function _ab2_update_grid_scaling!(σᶜᶜⁿ, σᶠᶜⁿ, σᶜᶠⁿ, σᶠᶠⁿ, σᶜᶜ⁻, ηⁿ, Gⁿ, grid, Δt, χ, U, V, u, v)
-    i, j = @index(Global, NTuple)
-    kᴺ = size(grid, 3)
-
-    C₁ = 3 * one(χ) / 2 + χ
-    C₂ =     one(χ) / 2 + χ
-
-    δx_U = δxᶜᶜᶜ(i, j, kᴺ, grid, Δy_qᶠᶜᶜ, barotropic_U, U, u)
-    δy_V = δyᶜᶜᶜ(i, j, kᴺ, grid, Δx_qᶜᶠᶜ, barotropic_V, V, v)
-    δh_U = (δx_U + δy_V) * Az⁻¹ᶜᶜᶜ(i, j, kᴺ, grid)
-
-    @inbounds ηⁿ[i, j, 1] -= Δt * (C₁ * δh_U - C₂ * Gⁿ[i, j, 1])
-    @inbounds Gⁿ[i, j, 1] = δh_U
-
-    update_grid_scaling!(σᶜᶜⁿ, σᶠᶜⁿ, σᶜᶠⁿ, σᶠᶠⁿ, σᶜᶜ⁻, i, j, grid, ηⁿ)
-end
-
-rk3_substep_grid!(grid, model, vertical_coordinate, Δt) = nothing
-
-function rk3_substep_grid!(grid::MutableGridOfSomeKind, model, ztype::ZStarCoordinate, Δt)
-
-    # Scalings and free surface
-    σᶜᶜ⁻ = grid.z.σᶜᶜ⁻
-    σᶜᶜⁿ = grid.z.σᶜᶜⁿ
-    σᶠᶜⁿ = grid.z.σᶠᶜⁿ
-    σᶜᶠⁿ = grid.z.σᶜᶠⁿ
-    σᶠᶠⁿ = grid.z.σᶠᶠⁿ
-    ηⁿ   = grid.z.ηⁿ
-    ηⁿ⁻¹ = ztype.storage
-
-    U, V = barotropic_velocities(model.free_surface)
-    u, v, _ = model.velocities
-    params = zstar_params(grid)
-
-    launch!(architecture(grid), grid, params, _rk3_update_grid_scaling!,
-            σᶜᶜⁿ, σᶠᶜⁿ, σᶜᶠⁿ, σᶠᶠⁿ, σᶜᶜ⁻, ηⁿ, ηⁿ⁻¹, grid, Δt, U, V, u, v)
-
+function rk_substep_grid!(grid::MutableGridOfSomeKind, model, ztype::ZStarCoordinate, Δt)
+    launch!(architecture(grid), grid, surface_kernel_parameters(grid), _update_zstar_scaling!, model.free_surface.η, grid)
+    if model.clock.stage == length(model.timestepper.β)
+       parent(grid.z.σᶜᶜ⁻) .= parent(grid.z.σᶜᶜⁿ)
+    end
     return nothing
 end
 
 # Update η in the grid
-# Note!!! This η is different than the free surface coming from the barotropic step!!
-# This η is the one used to compute the vertical spacing.
-# TODO: The two different free surfaces need to be reconciled.
-@kernel function _rk3_update_grid_scaling!(σᶜᶜⁿ, σᶠᶜⁿ, σᶜᶠⁿ, σᶠᶠⁿ, σᶜᶜ⁻, ηⁿ, ηⁿ⁻¹, grid, Δt, U, V, u, v)
-    i, j = @index(Global, NTuple)
-    kᴺ = size(grid, 3)
-
-    δx_U = δxᶜᶜᶜ(i, j, kᴺ, grid, Δy_qᶠᶜᶜ, barotropic_U, U, u)
-    δy_V = δyᶜᶜᶜ(i, j, kᴺ, grid, Δx_qᶜᶠᶜ, barotropic_V, V, v)
-    δh_U = (δx_U + δy_V) * Az⁻¹ᶜᶜᶜ(i, j, kᴺ, grid)
-
-    @inbounds ηⁿ[i, j, 1] = ηⁿ⁻¹[i, j, 1] - Δt * δh_U
-
-    update_grid_scaling!(σᶜᶜⁿ, σᶠᶜⁿ, σᶜᶠⁿ, σᶠᶠⁿ, σᶜᶜ⁻, i, j, grid, ηⁿ)
+@kernel function _update_zstar_scaling!(ηⁿ⁺¹, grid)
+    i, j = @index(Global, NTuple) 
+    @inbounds grid.z.ηⁿ[i, j, 1] = ηⁿ⁺¹[i, j, grid.Nz+1]
+    update_grid_scaling!(grid.z, i, j, grid)
 end
 
-@inline function update_grid_scaling!(σᶜᶜⁿ, σᶠᶜⁿ, σᶜᶠⁿ, σᶠᶠⁿ, σᶜᶜ⁻, i, j, grid, ηⁿ)
+@inline function update_grid_scaling!(z, i, j, grid)
     hᶜᶜ = static_column_depthᶜᶜᵃ(i, j, grid)
     hᶠᶜ = static_column_depthᶠᶜᵃ(i, j, grid)
     hᶜᶠ = static_column_depthᶜᶠᵃ(i, j, grid)
     hᶠᶠ = static_column_depthᶠᶠᵃ(i, j, grid)
 
-    Hᶜᶜ = column_depthᶜᶜᵃ(i, j, 1, grid, ηⁿ)
-    Hᶠᶜ = column_depthᶠᶜᵃ(i, j, 1, grid, ηⁿ)
-    Hᶜᶠ = column_depthᶜᶠᵃ(i, j, 1, grid, ηⁿ)
-    Hᶠᶠ = column_depthᶠᶠᵃ(i, j, 1, grid, ηⁿ)
+    Hᶜᶜ = column_depthᶜᶜᵃ(i, j, 1, grid, z.ηⁿ)
+    Hᶠᶜ = column_depthᶠᶜᵃ(i, j, 1, grid, z.ηⁿ)
+    Hᶜᶠ = column_depthᶜᶠᵃ(i, j, 1, grid, z.ηⁿ)
+    Hᶠᶠ = column_depthᶠᶠᵃ(i, j, 1, grid, z.ηⁿ)
 
     σᶜᶜ = ifelse(hᶜᶜ == 0, one(grid), Hᶜᶜ / hᶜᶜ)
     σᶠᶜ = ifelse(hᶠᶜ == 0, one(grid), Hᶠᶜ / hᶠᶜ)
@@ -130,32 +57,32 @@ end
     σᶠᶠ = ifelse(hᶠᶠ == 0, one(grid), Hᶠᶠ / hᶠᶠ)
 
     @inbounds begin
-        # Update previous scaling
-        σᶜᶜ⁻[i, j, 1] = σᶜᶜⁿ[i, j, 1]
-
         # update current scaling
-        σᶜᶜⁿ[i, j, 1] = σᶜᶜ
-        σᶠᶜⁿ[i, j, 1] = σᶠᶜ
-        σᶜᶠⁿ[i, j, 1] = σᶜᶠ
-        σᶠᶠⁿ[i, j, 1] = σᶠᶠ
+        z.σᶜᶜⁿ[i, j, 1] = σᶜᶜ
+        z.σᶠᶜⁿ[i, j, 1] = σᶠᶜ
+        z.σᶜᶠⁿ[i, j, 1] = σᶜᶠ
+        z.σᶠᶠⁿ[i, j, 1] = σᶠᶠ
     end
 end
 
-update_grid_vertical_velocity!(model, grid, ztype) = nothing
-
-function update_grid_vertical_velocity!(model, grid::MutableGridOfSomeKind, ::ZStarCoordinate)
+function update_grid_vertical_velocity!(velocities, model, grid::MutableGridOfSomeKind, ::ZStarCoordinate; parameters=surface_kernel_parameters(grid))
 
     # the barotropic velocities are retrieved from the free surface model for a
     # SplitExplicitFreeSurface and are calculated for other free surface models
-    U, V = barotropic_velocities(model.free_surface)
-    u, v, _ = model.velocities
-    ∂t_σ  = grid.z.∂t_σ
-
-    params = zstar_params(grid)
+    # Here we distringuish between the model (prognostic) velocities and the transport velocities
+    # used to advect tracers...
+    if velocities === model.velocities
+        U, V = barotropic_velocities(model.free_surface)
+    else
+        U, V = barotropic_transport(model.free_surface)
+    end
+    
+    u, v, _ = velocities
+    ∂t_σ    = grid.z.∂t_σ
 
     # Update the time derivative of the vertical spacing,
     # No need to fill the halo as the scaling is updated _IN_ the halos
-    launch!(architecture(grid), grid, params, _update_grid_vertical_velocity!, ∂t_σ, grid, U, V, u, v)
+    launch!(architecture(grid), grid, parameters, _update_grid_vertical_velocity!, ∂t_σ, grid, U, V, u, v)
 
     return nothing
 end
@@ -170,32 +97,11 @@ end
     δx_U = δxᶜᶜᶜ(i, j, kᴺ, grid, Δy_qᶠᶜᶜ, barotropic_U, U, u)
     δy_V = δyᶜᶜᶜ(i, j, kᴺ, grid, Δx_qᶜᶠᶜ, barotropic_V, V, v)
 
-    δh_U = (δx_U + δy_V) * Az⁻¹ᶜᶜᶜ(i, j, kᴺ, grid)
+    δh_U = (δx_U + δy_V) * Az⁻¹ᶜᶜᶜ(i, j, kᴺ, grid) 
 
     @inbounds ∂t_σ[i, j, 1] = ifelse(hᶜᶜ == 0, zero(grid), - δh_U / hᶜᶜ)
 end
 
-# If U and V exist, we use them
-@inline barotropic_U(i, j, k, grid, U, u) = @inbounds U[i, j, k]
-@inline barotropic_V(i, j, k, grid, V, v) = @inbounds V[i, j, k]
-
-# If either U or V are not available, we compute them
-@inline function barotropic_U(i, j, k, grid, ::Nothing, u)
-    U = 0
-    for k in 1:size(grid, 3)
-        @inbounds U += u[i, j, k] * Δzᶠᶜᶜ(i, j, k, grid)
-    end
-    return U
-end
-
-@inline function barotropic_V(i, j, k, grid, ::Nothing, v)
-    V = 0
-    for k in 1:size(grid, 3)
-        @inbounds V += v[i, j, k] * Δzᶜᶠᶜ(i, j, k, grid)
-    end
-    return V
-end
-
 #####
 ##### Multiply by grid scaling
 #####
diff --git a/src/Models/NonhydrostaticModels/NonhydrostaticModels.jl b/src/Models/NonhydrostaticModels/NonhydrostaticModels.jl
index a1c0a8955c..f8979c3fb2 100644
--- a/src/Models/NonhydrostaticModels/NonhydrostaticModels.jl
+++ b/src/Models/NonhydrostaticModels/NonhydrostaticModels.jl
@@ -101,6 +101,9 @@ prognostic_fields(model::NonhydrostaticModel) = merge(model.velocities, model.tr
 # Unpack model.particles to update particle properties. See Models/LagrangianParticleTracking/LagrangianParticleTracking.jl
 step_lagrangian_particles!(model::NonhydrostaticModel, Δt) = step_lagrangian_particles!(model.particles, model, Δt)
 
+include("cache_nonhydrostatic_tendencies.jl")
+include("nonhydrostatic_ab2_step.jl")
+include("nonhydrostatic_rk3_substep.jl")
 include("solve_for_pressure.jl")
 include("update_hydrostatic_pressure.jl")
 include("update_nonhydrostatic_model_state.jl")
diff --git a/src/Models/NonhydrostaticModels/cache_nonhydrostatic_tendencies.jl b/src/Models/NonhydrostaticModels/cache_nonhydrostatic_tendencies.jl
new file mode 100644
index 0000000000..9b787a0fed
--- /dev/null
+++ b/src/Models/NonhydrostaticModels/cache_nonhydrostatic_tendencies.jl
@@ -0,0 +1,24 @@
+using Oceananigans: prognostic_fields
+using Oceananigans.Grids: AbstractGrid
+using Oceananigans.Utils: launch!
+
+import Oceananigans.TimeSteppers: cache_previous_tendencies!
+
+""" Store source terms for `u`, `v`, and `w`. """
+@kernel function _cache_field_tendencies!(G⁻, G⁰)
+    i, j, k = @index(Global, NTuple)
+    @inbounds G⁻[i, j, k] = G⁰[i, j, k]
+end
+
+""" Store previous source terms before updating them. """
+function cache_previous_tendencies!(model::NonhydrostaticModel)
+    model_fields = prognostic_fields(model)
+
+    for field_name in keys(model_fields)
+        launch!(model.architecture, model.grid, :xyz, _cache_field_tendencies!,
+                model.timestepper.G⁻[field_name],
+                model.timestepper.Gⁿ[field_name])
+    end
+
+    return nothing
+end
diff --git a/src/Models/NonhydrostaticModels/compute_nonhydrostatic_tendencies.jl b/src/Models/NonhydrostaticModels/compute_nonhydrostatic_tendencies.jl
index fcbd87a7c0..c1f5ef5b3f 100644
--- a/src/Models/NonhydrostaticModels/compute_nonhydrostatic_tendencies.jl
+++ b/src/Models/NonhydrostaticModels/compute_nonhydrostatic_tendencies.jl
@@ -35,6 +35,7 @@ function compute_tendencies!(model::NonhydrostaticModel, callbacks)
 
     compute_interior_tendency_contributions!(model, kernel_parameters; active_cells_map)
     complete_communication_and_compute_buffer!(model, grid, arch)
+    compute_flux_bc_tendencies!(model)
 
     for callback in callbacks
         callback.callsite isa TendencyCallsite && callback(model)
diff --git a/src/Models/NonhydrostaticModels/nonhydrostatic_ab2_step.jl b/src/Models/NonhydrostaticModels/nonhydrostatic_ab2_step.jl
new file mode 100644
index 0000000000..07e06303e7
--- /dev/null
+++ b/src/Models/NonhydrostaticModels/nonhydrostatic_ab2_step.jl
@@ -0,0 +1,47 @@
+using Oceananigans.TimeSteppers: ab2_step_field!, implicit_step!
+import Oceananigans.TimeSteppers: ab2_step!
+
+# AB2 step for NonhydrostaticModel. This is a predictor-corrector scheme where the
+# predictor step for velocities is an AB2 step. The velocities are then corrected
+# using the pressure correction obtained by solving a Poisson equation for the pressure. 
+function ab2_step!(model::NonhydrostaticModel, Δt, callbacks)
+    grid = model.grid
+
+    compute_tendencies!(model, callbacks)
+
+    # Velocity steps
+    for (i, field) in enumerate(model.velocities)
+        kernel_args = (field, Δt, model.timestepper.χ, model.timestepper.Gⁿ[i], model.timestepper.G⁻[i])
+        launch!(architecture(grid), grid, :xyz, ab2_step_field!, kernel_args...; exclude_periphery=true)
+
+        implicit_step!(field,
+                       model.timestepper.implicit_solver,
+                       model.closure,
+                       model.diffusivity_fields,
+                       nothing,
+                       model.clock,
+                       fields(model),
+                       Δt)
+    end
+
+    # Tracer steps
+    for (i, name) in enumerate(propertynames(model.tracers))
+        field = model.tracers[name]
+        kernel_args = (field, Δt, model.timestepper.χ, model.timestepper.Gⁿ[name], model.timestepper.G⁻[name])
+        launch!(architecture(grid), grid, :xyz, ab2_step_field!, kernel_args...)
+
+        implicit_step!(field,
+                       model.timestepper.implicit_solver,
+                       model.closure,
+                       model.diffusivity_fields,
+                       Val(i),
+                       model.clock,
+                       fields(model),
+                       Δt)
+    end
+
+    compute_pressure_correction!(model, Δt)
+    make_pressure_correction!(model, Δt)
+
+    return nothing
+end
diff --git a/src/Models/NonhydrostaticModels/nonhydrostatic_model.jl b/src/Models/NonhydrostaticModels/nonhydrostatic_model.jl
index c89fb9fdae..440092c4db 100644
--- a/src/Models/NonhydrostaticModels/nonhydrostatic_model.jl
+++ b/src/Models/NonhydrostaticModels/nonhydrostatic_model.jl
@@ -113,25 +113,25 @@ Keyword arguments
   - `auxiliary_fields`: `NamedTuple` of auxiliary fields. Default: `nothing`
 """
 function NonhydrostaticModel(; grid,
-                             clock = Clock(grid),
-                             advection = Centered(),
-                             buoyancy = nothing,
-                             coriolis = nothing,
-                             stokes_drift = nothing,
-                             forcing::NamedTuple = NamedTuple(),
-                             closure = nothing,
-                             boundary_conditions::NamedTuple = NamedTuple(),
-                             tracers = (),
-                             timestepper = :RungeKutta3,
-                             background_fields::BFOrNamedTuple = NamedTuple(),
-                             particles::ParticlesOrNothing = nothing,
-                             biogeochemistry::AbstractBGCOrNothing = nothing,
-                             velocities = nothing,
-                             hydrostatic_pressure_anomaly = DefaultHydrostaticPressureAnomaly(),
-                             nonhydrostatic_pressure = CenterField(grid),
-                             diffusivity_fields = nothing,
-                             pressure_solver = nothing,
-                             auxiliary_fields = NamedTuple())
+                               clock = Clock(grid),
+                               advection = Centered(),
+                               buoyancy = nothing,
+                               coriolis = nothing,
+                               stokes_drift = nothing,
+                               forcing::NamedTuple = NamedTuple(),
+                               closure = nothing,
+                               boundary_conditions::NamedTuple = NamedTuple(),
+                               tracers = (),
+                               timestepper = :RungeKutta3,
+                               background_fields::BFOrNamedTuple = NamedTuple(),
+                               particles::ParticlesOrNothing = nothing,
+                               biogeochemistry::AbstractBGCOrNothing = nothing,
+                               velocities = nothing,
+                               hydrostatic_pressure_anomaly = DefaultHydrostaticPressureAnomaly(),
+                               nonhydrostatic_pressure = CenterField(grid),
+                               diffusivity_fields = nothing,
+                               pressure_solver = nothing,
+                               auxiliary_fields = NamedTuple())
 
     arch = architecture(grid)
 
@@ -238,7 +238,7 @@ function NonhydrostaticModel(; grid,
                                 forcing, closure, background_fields, particles, biogeochemistry, velocities, tracers,
                                 pressures, diffusivity_fields, timestepper, pressure_solver, auxiliary_fields, boundary_mass_fluxes)
 
-    update_state!(model; compute_tendencies = false)
+    update_state!(model)
 
     return model
 end
diff --git a/src/Models/NonhydrostaticModels/nonhydrostatic_rk3_substep.jl b/src/Models/NonhydrostaticModels/nonhydrostatic_rk3_substep.jl
new file mode 100644
index 0000000000..0eb1b27cdc
--- /dev/null
+++ b/src/Models/NonhydrostaticModels/nonhydrostatic_rk3_substep.jl
@@ -0,0 +1,45 @@
+using Oceananigans.TimeSteppers: rk3_substep_field!, stage_Δt
+import Oceananigans.TimeSteppers: rk3_substep!
+
+function rk3_substep!(model::NonhydrostaticModel, Δt, γⁿ, ζⁿ, callbacks)
+    Δτ   = stage_Δt(Δt, γⁿ, ζⁿ) 
+    grid = model.grid
+
+    compute_tendencies!(model, callbacks)
+
+    # Velocity steps
+    for (i, field) in enumerate(model.velocities)
+        kernel_args = (field, Δt, γⁿ, ζⁿ, model.timestepper.Gⁿ[i], model.timestepper.G⁻[i])
+        launch!(architecture(grid), grid, :xyz, rk3_substep_field!, kernel_args...; exclude_periphery=true)
+
+        implicit_step!(field,
+                       model.timestepper.implicit_solver,
+                       model.closure,
+                       model.diffusivity_fields,
+                       nothing,
+                       model.clock,
+                       fields(model),
+                       Δτ)
+    end
+
+    # Tracer steps
+    for (i, name) in enumerate(propertynames(model.tracers))
+        field = model.tracers[name]
+        kernel_args = (field, Δt, γⁿ, ζⁿ, model.timestepper.Gⁿ[name], model.timestepper.G⁻[name])
+        launch!(architecture(grid), grid, :xyz, rk3_substep_field!, kernel_args...)
+
+        implicit_step!(field,
+                       model.timestepper.implicit_solver,
+                       model.closure,
+                       model.diffusivity_fields,
+                       Val(i),
+                       model.clock,
+                       fields(model),
+                       Δτ)
+    end
+
+    compute_pressure_correction!(model, Δτ)
+    make_pressure_correction!(model, Δτ)
+
+    return nothing
+end
diff --git a/src/Models/NonhydrostaticModels/pressure_correction.jl b/src/Models/NonhydrostaticModels/pressure_correction.jl
index a75c9c666e..cbfe638d78 100644
--- a/src/Models/NonhydrostaticModels/pressure_correction.jl
+++ b/src/Models/NonhydrostaticModels/pressure_correction.jl
@@ -1,5 +1,3 @@
-import Oceananigans.TimeSteppers: compute_pressure_correction!, make_pressure_correction!
-
 """
     compute_pressure_correction!(model::NonhydrostaticModel, Δt)
 
diff --git a/src/Models/NonhydrostaticModels/set_nonhydrostatic_model.jl b/src/Models/NonhydrostaticModels/set_nonhydrostatic_model.jl
index e540049f76..89139d9b68 100644
--- a/src/Models/NonhydrostaticModels/set_nonhydrostatic_model.jl
+++ b/src/Models/NonhydrostaticModels/set_nonhydrostatic_model.jl
@@ -1,5 +1,5 @@
 using Oceananigans.BoundaryConditions: fill_halo_regions!
-using Oceananigans.TimeSteppers: update_state!, compute_pressure_correction!, make_pressure_correction!
+using Oceananigans.TimeSteppers: update_state!
 
 import Oceananigans.Fields: set!
 
@@ -47,13 +47,13 @@ function set!(model::NonhydrostaticModel; enforce_incompressibility=true, kwargs
     # Apply a mask
     foreach(mask_immersed_field!, model.tracers)
     foreach(mask_immersed_field!, model.velocities)
-    update_state!(model; compute_tendencies = false)
+    update_state!(model)
 
     if enforce_incompressibility
         FT = eltype(model.grid)
         compute_pressure_correction!(model, one(FT))
         make_pressure_correction!(model, one(FT))
-        update_state!(model; compute_tendencies = false)
+        update_state!(model)
     end
 
     return nothing
diff --git a/src/Models/NonhydrostaticModels/update_hydrostatic_pressure.jl b/src/Models/NonhydrostaticModels/update_hydrostatic_pressure.jl
index a5733295a2..acbf0f8eb5 100644
--- a/src/Models/NonhydrostaticModels/update_hydrostatic_pressure.jl
+++ b/src/Models/NonhydrostaticModels/update_hydrostatic_pressure.jl
@@ -28,23 +28,11 @@ update_hydrostatic_pressure!(grid, model; kwargs...) =
 const PCB = PartialCellBottom
 const PCBIBG = ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:Any, <:PCB}
 
-update_hydrostatic_pressure!(pHY′, arch, ibg::PCBIBG, buoyancy, tracers; parameters = p_kernel_parameters(ibg.underlying_grid)) =
+update_hydrostatic_pressure!(pHY′, arch, ibg::PCBIBG, buoyancy, tracers; parameters = surface_kernel_parameters(ibg.underlying_grid)) =
     update_hydrostatic_pressure!(pHY′, arch, ibg.underlying_grid, buoyancy, tracers; parameters)
 
-update_hydrostatic_pressure!(pHY′, arch, grid, buoyancy, tracers; parameters = p_kernel_parameters(grid)) =
+update_hydrostatic_pressure!(pHY′, arch, grid, buoyancy, tracers; parameters = surface_kernel_parameters(grid)) =
     launch!(arch, grid, parameters, _update_hydrostatic_pressure!, pHY′, grid, buoyancy, tracers)
 
 update_hydrostatic_pressure!(::Nothing, arch, grid, args...; kw...) = nothing
-update_hydrostatic_pressure!(::Nothing, arch, ::PCBIBG, args...; kw...) = nothing
-
-# extend p kernel to compute also the boundaries
-@inline function p_kernel_parameters(grid)
-    Nx, Ny, _ = size(grid)
-    TX, TY, _ = topology(grid)
-
-    ii = ifelse(TX == Flat, 1:Nx, 0:Nx+1)
-    jj = ifelse(TY == Flat, 1:Ny, 0:Ny+1)
-
-    return KernelParameters(ii, jj)
-end
-
+update_hydrostatic_pressure!(::Nothing, arch, ::PCBIBG, args...; kw...) = nothing
\ No newline at end of file
diff --git a/src/Models/NonhydrostaticModels/update_nonhydrostatic_model_state.jl b/src/Models/NonhydrostaticModels/update_nonhydrostatic_model_state.jl
index ac16bcad60..6918d5e7b6 100644
--- a/src/Models/NonhydrostaticModels/update_nonhydrostatic_model_state.jl
+++ b/src/Models/NonhydrostaticModels/update_nonhydrostatic_model_state.jl
@@ -6,7 +6,7 @@ using Oceananigans.BoundaryConditions: update_boundary_conditions!
 using Oceananigans.TurbulenceClosures: compute_diffusivities!
 using Oceananigans.Fields: compute!
 using Oceananigans.ImmersedBoundaries: mask_immersed_field!
-using Oceananigans.Models: update_model_field_time_series!
+using Oceananigans.Models: update_model_field_time_series!, surface_kernel_parameters
 
 import Oceananigans.TimeSteppers: update_state!
 
@@ -17,7 +17,7 @@ Update peripheral aspects of the model (halo regions, diffusivities, hydrostatic
 pressure) to the current model state. If `callbacks` are provided (in an array),
 they are called in the end.
 """
-function update_state!(model::NonhydrostaticModel, callbacks=[]; compute_tendencies = true)
+function update_state!(model::NonhydrostaticModel, callbacks=[])
 
     # Mask immersed tracers
     foreach(model.tracers) do tracer
@@ -49,13 +49,10 @@ function update_state!(model::NonhydrostaticModel, callbacks=[]; compute_tendenc
 
     update_biogeochemical_state!(model.biogeochemistry, model)
 
-    compute_tendencies &&
-        @apply_regionally compute_tendencies!(model, callbacks)
-
     return nothing
 end
 
-function compute_auxiliaries!(model::NonhydrostaticModel; p_parameters = tuple(p_kernel_parameters(model.grid)),
+function compute_auxiliaries!(model::NonhydrostaticModel; p_parameters = tuple(surface_kernel_parameters(model.grid)),
                                                           κ_parameters = tuple(:xyz))
 
     closure = model.closure
@@ -65,5 +62,6 @@ function compute_auxiliaries!(model::NonhydrostaticModel; p_parameters = tuple(p
         compute_diffusivities!(diffusivity, closure, model; parameters = κpar)
         update_hydrostatic_pressure!(model; parameters = ppar)
     end
+    
     return nothing
 end
diff --git a/src/Models/ShallowWaterModels/ShallowWaterModels.jl b/src/Models/ShallowWaterModels/ShallowWaterModels.jl
index 90fd05f5da..21293d232e 100644
--- a/src/Models/ShallowWaterModels/ShallowWaterModels.jl
+++ b/src/Models/ShallowWaterModels/ShallowWaterModels.jl
@@ -39,7 +39,9 @@ prognostic_fields(model::ShallowWaterModel) = fields(model)
 
 include("solution_and_tracer_tendencies.jl")
 include("compute_shallow_water_tendencies.jl")
+include("shallow_water_rk3_substep.jl")
 include("update_shallow_water_state.jl")
+include("cache_shallow_water_tendencies.jl")
 include("shallow_water_advection_operators.jl")
 include("shallow_water_diffusion_operators.jl")
 include("shallow_water_cell_advection_timescale.jl")
diff --git a/src/Models/ShallowWaterModels/store_shallow_water_tendencies.jl b/src/Models/ShallowWaterModels/cache_shallow_water_tendencies.jl
similarity index 52%
rename from src/Models/ShallowWaterModels/store_shallow_water_tendencies.jl
rename to src/Models/ShallowWaterModels/cache_shallow_water_tendencies.jl
index 6add5b4563..dda1c27b0e 100644
--- a/src/Models/ShallowWaterModels/store_shallow_water_tendencies.jl
+++ b/src/Models/ShallowWaterModels/cache_shallow_water_tendencies.jl
@@ -1,10 +1,8 @@
 using Oceananigans.Utils: work_layout
 using Oceananigans.Architectures: device
-using Oceananigans.TimeSteppers: cache_tracer_tendency!
-
 using KernelAbstractions.Extras.LoopInfo: @unroll
 
-import Oceananigans.TimeSteppers: _cache_field_tendencies!
+import Oceananigans.TimeSteppers: cache_previous_tendencies!
 
 """ Store source terms for `uh`, `vh`, and `h`. """
 @kernel function _cache_solution_tendencies!(G⁻, grid, G⁰)
@@ -14,22 +12,24 @@ import Oceananigans.TimeSteppers: _cache_field_tendencies!
     end
 end
 
+""" Store source terms for `u`, `v`, and `w`. """
+@kernel function _cache_field_tendencies!(G⁻, grid, G⁰)
+    i, j, k = @index(Global, NTuple)
+    @inbounds G⁻[i, j, k] = G⁰[i, j, k]
+end
+
 """ Store previous source terms before updating them. """
 function cache_previous_tendencies!(model::ShallowWaterModel)
-    workgroup, worksize = work_layout(model.grid, :xyz)
-
-    cache_solution_tendencies_kernel! = _cache_solution_tendencies!(device(model.architecture), workgroup, worksize)
-    cache_tracer_tendency_kernel! = _cache_field_tendencies!(device(model.architecture), workgroup, worksize)
+    _cache_solution!, _ = configure_kernel(model.architecture, model.grid, :xyz, _cache_solution_tendencies!)
+    _cache_tracer!, _ = configure_kernel(model.architecture, model.grid, :xyz, _cache_field_tendencies!)
 
-    cache_solution_tendencies_kernel!(model.timestepper.G⁻,
-                                      model.grid,
-                                      model.timestepper.Gⁿ)
+    _cache_solution!(model.timestepper.G⁻, model.grid, model.timestepper.Gⁿ)
 
     # Tracer fields
     for i in 4:length(model.timestepper.G⁻)
         @inbounds Gc⁻ = model.timestepper.G⁻[i]
         @inbounds Gc⁰ = model.timestepper.Gⁿ[i]
-        cache_tracer_tendency_kernel!(Gc⁻, model.grid, Gc⁰)
+        _cache_tracer!(Gc⁻, model.grid, Gc⁰)
     end
 
     return nothing
diff --git a/src/Models/ShallowWaterModels/compute_shallow_water_tendencies.jl b/src/Models/ShallowWaterModels/compute_shallow_water_tendencies.jl
index 268930bd62..a8655248bc 100644
--- a/src/Models/ShallowWaterModels/compute_shallow_water_tendencies.jl
+++ b/src/Models/ShallowWaterModels/compute_shallow_water_tendencies.jl
@@ -44,6 +44,8 @@ function compute_tendencies!(model::ShallowWaterModel, callbacks)
                                              model.clock,
                                              model.formulation)
 
+    compute_flux_bc_tendencies!(model)
+
     [callback(model) for callback in callbacks if isa(callback.callsite, TendencyCallsite)]
 
     return nothing
diff --git a/src/Models/ShallowWaterModels/set_shallow_water_model.jl b/src/Models/ShallowWaterModels/set_shallow_water_model.jl
index 9860931f62..9017a5e6d9 100644
--- a/src/Models/ShallowWaterModels/set_shallow_water_model.jl
+++ b/src/Models/ShallowWaterModels/set_shallow_water_model.jl
@@ -14,7 +14,7 @@ function set!(model::ShallowWaterModel; kwargs...)
         set!(ϕ, value)
     end
 
-    update_state!(model; compute_tendencies = false)
+    update_state!(model)
 
     return nothing
 end
diff --git a/src/Models/ShallowWaterModels/shallow_water_model.jl b/src/Models/ShallowWaterModels/shallow_water_model.jl
index ebbbb1c1b3..2bee31c07d 100644
--- a/src/Models/ShallowWaterModels/shallow_water_model.jl
+++ b/src/Models/ShallowWaterModels/shallow_water_model.jl
@@ -205,7 +205,7 @@ function ShallowWaterModel(;
                               timestepper,
                               formulation)
 
-    update_state!(model; compute_tendencies = false)
+    update_state!(model)
 
     return model
 end
diff --git a/src/Models/ShallowWaterModels/rk3_substep_shallow_water_model.jl b/src/Models/ShallowWaterModels/shallow_water_rk3_substep.jl
similarity index 60%
rename from src/Models/ShallowWaterModels/rk3_substep_shallow_water_model.jl
rename to src/Models/ShallowWaterModels/shallow_water_rk3_substep.jl
index 880ae39163..2f891fa803 100644
--- a/src/Models/ShallowWaterModels/rk3_substep_shallow_water_model.jl
+++ b/src/Models/ShallowWaterModels/shallow_water_rk3_substep.jl
@@ -1,39 +1,38 @@
-using Oceananigans.Utils: work_layout
-using Oceananigans.Architectures: device
+using Oceananigans.Architectures: architecture
+using Oceananigans.Utils: configure_kernel
 using Oceananigans.TimeSteppers: rk3_substep_field!
 
 import Oceananigans.TimeSteppers: rk3_substep!
 
-function rk3_substep!(model::ShallowWaterModel, Δt, γⁿ, ζⁿ)
+function rk3_substep!(model::ShallowWaterModel, Δt, γⁿ, ζⁿ, callbacks)
 
-    workgroup, worksize = work_layout(model.grid, :xyz)
+    compute_tendencies!(model, callbacks)
+    grid = model.grid
 
-    substep_solution_kernel! = rk3_substep_solution!(device(model.architecture), workgroup, worksize)
-    substep_tracer_kernel! = rk3_substep_tracer!(device(model.architecture), workgroup, worksize)
 
+    launch!(architecture(grid), grid, :xyz, _rk_substep_solution!, 
+            model.solution,
+            Δt, γⁿ, ζⁿ,
+            model.timestepper.Gⁿ,
+            model.timestepper.G⁻)
 
-    substep_solution_kernel!(model.solution,
-                             Δt, γⁿ, ζⁿ,
-                             model.timestepper.Gⁿ,
-                             model.timestepper.G⁻)
-
+    _tracer_kernel!, _ = configure_kernel(architecture(grid), grid, :xyz, rk3_substep_field!)
 
     for i in 1:length(model.tracers)
         @inbounds c = model.tracers[i]
         @inbounds Gcⁿ = model.timestepper.Gⁿ[i+3]
         @inbounds Gc⁻ = model.timestepper.G⁻[i+3]
 
-        substep_tracer_kernel!(c, Δt, γⁿ, ζⁿ, Gcⁿ, Gc⁻)
+        _tracer_kernel!(c, Δt, γⁿ, ζⁿ, Gcⁿ, Gc⁻)
     end
 
-
     return nothing
 end
 
 """
 Time step solution fields with a 3rd-order Runge-Kutta method.
 """
-@kernel function rk3_substep_solution!(U, Δt, γⁿ, ζⁿ, Gⁿ, G⁻)
+@kernel function _rk_substep_solution!(U, Δt, γⁿ, ζⁿ, Gⁿ, G⁻)
     i, j, k = @index(Global, NTuple)
 
     @inbounds begin
@@ -46,7 +45,7 @@ end
 """
 Time step solution fields with a 3rd-order Runge-Kutta method.
 """
-@kernel function rk3_substep_solution!(U, Δt, γ¹, ::Nothing, G¹, G⁰)
+@kernel function _rk_substep_solution!(U, Δt, γ¹, ::Nothing, G¹, G⁰)
     i, j, k = @index(Global, NTuple)
 
     @inbounds begin
diff --git a/src/Models/ShallowWaterModels/update_shallow_water_state.jl b/src/Models/ShallowWaterModels/update_shallow_water_state.jl
index 5c2c08fae3..7351c390c7 100644
--- a/src/Models/ShallowWaterModels/update_shallow_water_state.jl
+++ b/src/Models/ShallowWaterModels/update_shallow_water_state.jl
@@ -18,7 +18,7 @@ Next, `callbacks` are executed.
 
 Finally, tendencies are computed if `compute_tendencies=true`.
 """
-function update_state!(model::ShallowWaterModel, callbacks=[]; compute_tendencies=true)
+function update_state!(model::ShallowWaterModel, callbacks=[])
 
     # Mask immersed fields
     foreach(mask_immersed_field!, merge(model.solution, model.tracers))
@@ -38,8 +38,6 @@ function update_state!(model::ShallowWaterModel, callbacks=[]; compute_tendencie
         end
     end
 
-    compute_tendencies && compute_tendencies!(model, callbacks)
-
     return nothing
 end
 
diff --git a/src/Models/VarianceDissipationComputations/VarianceDissipationComputations.jl b/src/Models/VarianceDissipationComputations/VarianceDissipationComputations.jl
index 31ed91d5b1..1516bd448c 100644
--- a/src/Models/VarianceDissipationComputations/VarianceDissipationComputations.jl
+++ b/src/Models/VarianceDissipationComputations/VarianceDissipationComputations.jl
@@ -12,7 +12,7 @@ using Oceananigans.Operators
 using Oceananigans.BoundaryConditions
 using Oceananigans.TimeSteppers: QuasiAdamsBashforth2TimeStepper,
                                  RungeKutta3TimeStepper,
-                                 SplitRungeKutta3TimeStepper
+                                 SplitRungeKuttaTimeStepper
 
 using Oceananigans.TurbulenceClosures: viscosity,
                                        diffusivity,
@@ -33,7 +33,7 @@ using Oceananigans.Operators: volume
 using Oceananigans.Utils: IterationInterval, ConsecutiveIterations
 using KernelAbstractions: @kernel, @index
 
-const RungeKuttaScheme = Union{RungeKutta3TimeStepper, SplitRungeKutta3TimeStepper}
+const RungeKuttaScheme = Union{RungeKutta3TimeStepper, SplitRungeKuttaTimeStepper}
 
 struct VarianceDissipation{P, K, A, D, S}
     advective_production :: P
@@ -80,7 +80,7 @@ Keyword Arguments
 
 !!! compat "Time stepper compatibility"
     At the moment, the variance dissipation diagnostic is supported only for a [`QuasiAdamsBashforth2TimeStepper`](@ref)
-    and a [`SplitRungeKutta3TimeStepper`](@ref).
+    and a [`SplitRungeKuttaTimeStepper`](@ref).
 """
 function VarianceDissipation(tracer_name, grid;
                              Uⁿ⁻¹ = VelocityFields(grid),
diff --git a/src/Models/VarianceDissipationComputations/advective_dissipation.jl b/src/Models/VarianceDissipationComputations/advective_dissipation.jl
index be9ad29d92..e73b17cc5e 100644
--- a/src/Models/VarianceDissipationComputations/advective_dissipation.jl
+++ b/src/Models/VarianceDissipationComputations/advective_dissipation.jl
@@ -63,7 +63,7 @@ end
     end
 end
 
-@kernel function _cache_advective_fluxes!(Fⁿ, Fⁿ⁻¹, grid::AbstractGrid, advection, U, c)
+@kernel function _cache_advective_fluxes!(Fⁿ, Fⁿ⁻¹, grid::AbstractGrid, advection, U, tracer)
     i, j, k = @index(Global, NTuple)
 
     @inbounds begin
@@ -73,19 +73,19 @@ end
         Fⁿ⁻¹.z[i, j, k] = Fⁿ.z[i, j, k]
 
         # Calculate new advective fluxes
-        Fⁿ.x[i, j, k] = _advective_tracer_flux_x(i, j, k, grid, advection, U.u, c) * σⁿ(i, j, k, grid, f, c, c)
-        Fⁿ.y[i, j, k] = _advective_tracer_flux_y(i, j, k, grid, advection, U.v, c) * σⁿ(i, j, k, grid, c, f, c)
-        Fⁿ.z[i, j, k] = _advective_tracer_flux_z(i, j, k, grid, advection, U.w, c) * σⁿ(i, j, k, grid, c, c, f)
+        Fⁿ.x[i, j, k] = _advective_tracer_flux_x(i, j, k, grid, advection, U.u, tracer) * σⁿ(i, j, k, grid, f, c, c)
+        Fⁿ.y[i, j, k] = _advective_tracer_flux_y(i, j, k, grid, advection, U.v, tracer) * σⁿ(i, j, k, grid, c, f, c)
+        Fⁿ.z[i, j, k] = _advective_tracer_flux_z(i, j, k, grid, advection, U.w, tracer) * σⁿ(i, j, k, grid, c, c, f)
     end
 end
 
-@kernel function _cache_advective_fluxes!(Fⁿ, grid::AbstractGrid, advection, U, c)
+@kernel function _cache_advective_fluxes!(Fⁿ, grid::AbstractGrid, advection, U, tracer)
     i, j, k = @index(Global, NTuple)
 
     @inbounds begin
         # Calculate new advective fluxes
-        Fⁿ.x[i, j, k] = _advective_tracer_flux_x(i, j, k, grid, advection, U.u, c) * σⁿ(i, j, k, grid, f, c, c)
-        Fⁿ.y[i, j, k] = _advective_tracer_flux_y(i, j, k, grid, advection, U.v, c) * σⁿ(i, j, k, grid, c, f, c)
-        Fⁿ.z[i, j, k] = _advective_tracer_flux_z(i, j, k, grid, advection, U.w, c) * σⁿ(i, j, k, grid, c, c, f)
+        Fⁿ.x[i, j, k] = _advective_tracer_flux_x(i, j, k, grid, advection, U.u, tracer) * σⁿ(i, j, k, grid, f, c, c)
+        Fⁿ.y[i, j, k] = _advective_tracer_flux_y(i, j, k, grid, advection, U.v, tracer) * σⁿ(i, j, k, grid, c, f, c)
+        Fⁿ.z[i, j, k] = _advective_tracer_flux_z(i, j, k, grid, advection, U.w, tracer) * σⁿ(i, j, k, grid, c, c, f)
     end
 end
\ No newline at end of file
diff --git a/src/Models/VarianceDissipationComputations/compute_dissipation.jl b/src/Models/VarianceDissipationComputations/compute_dissipation.jl
index 3599c24959..68ef66e870 100644
--- a/src/Models/VarianceDissipationComputations/compute_dissipation.jl
+++ b/src/Models/VarianceDissipationComputations/compute_dissipation.jl
@@ -61,8 +61,8 @@ end
 assemble_advective_dissipation!(P, grid, ts::QuasiAdamsBashforth2TimeStepper, substep, Fⁿ, Fⁿ⁻¹, Uⁿ, Uⁿ⁻¹, cⁿ⁺¹, cⁿ) =
     launch!(architecture(grid), grid, :xyz, _assemble_ab2_advective_dissipation!, P, grid, ts.χ, Fⁿ, Fⁿ⁻¹, Uⁿ, Uⁿ⁻¹, cⁿ⁺¹, cⁿ)
 
-function assemble_advective_dissipation!(P, grid, ::RungeKuttaScheme, substep, Fⁿ, Fⁿ⁻¹, Uⁿ, Uⁿ⁻¹, cⁿ⁺¹, cⁿ)
-    if substep == 3
+function assemble_advective_dissipation!(P, grid, ts::RungeKuttaScheme, substep, Fⁿ, Fⁿ⁻¹, Uⁿ, Uⁿ⁻¹, cⁿ⁺¹, cⁿ)
+    if substep == length(ts.β)
         launch!(architecture(grid), grid, :xyz, _assemble_rk3_advective_dissipation!, P, grid, Fⁿ, Uⁿ, cⁿ⁺¹, cⁿ)
     end
     return nothing
@@ -71,8 +71,8 @@ end
 assemble_diffusive_dissipation!(K, grid, ts::QuasiAdamsBashforth2TimeStepper, substep, Vⁿ, Vⁿ⁻¹, cⁿ⁺¹, cⁿ) =
     launch!(architecture(grid), grid, :xyz, _assemble_ab2_diffusive_dissipation!, K, grid, ts.χ, Vⁿ, Vⁿ⁻¹, cⁿ⁺¹, cⁿ)
 
-function assemble_diffusive_dissipation!(K, grid, ::RungeKuttaScheme, substep, Vⁿ, Vⁿ⁻¹, cⁿ⁺¹, cⁿ)
-    if substep == 3
+function assemble_diffusive_dissipation!(K, grid, ts::RungeKuttaScheme, substep, Vⁿ, Vⁿ⁻¹, cⁿ⁺¹, cⁿ)
+    if substep == length(ts.β)
         launch!(architecture(grid), grid, :xyz, _assemble_rk3_diffusive_dissipation!, K, grid, Vⁿ, cⁿ⁺¹, cⁿ)
     end
     return nothing
diff --git a/src/Models/VarianceDissipationComputations/diffusive_dissipation.jl b/src/Models/VarianceDissipationComputations/diffusive_dissipation.jl
index 2bcb46f5fd..64b82b91ef 100644
--- a/src/Models/VarianceDissipationComputations/diffusive_dissipation.jl
+++ b/src/Models/VarianceDissipationComputations/diffusive_dissipation.jl
@@ -90,11 +90,11 @@ compute_diffusive_fluxes!(Vⁿ, i, j, k, grid, ::Nothing, K, b, c, c_id, clk, fi
 
 const etd = ExplicitTimeDiscretization()
 
-@inline function compute_diffusive_fluxes!(Vⁿ, i, j, k, grid, clo, K, b, c, c_id, clk, fields)
+@inline function compute_diffusive_fluxes!(Vⁿ, i, j, k, grid, clo, K, b, tracer, c_id, clk, fields)
     @inbounds begin
-        Vⁿ.x[i, j, k] += _diffusive_flux_x(i, j, k, grid, etd, clo, K, c_id, c, clk, fields, b) * Axᶠᶜᶜ(i, j, k, grid) * σⁿ(i, j, k, grid, f, c, c)
-        Vⁿ.y[i, j, k] += _diffusive_flux_y(i, j, k, grid, etd, clo, K, c_id, c, clk, fields, b) * Ayᶜᶠᶜ(i, j, k, grid) * σⁿ(i, j, k, grid, c, f, c)
-        Vⁿ.z[i, j, k] += _diffusive_flux_z(i, j, k, grid, etd, clo, K, c_id, c, clk, fields, b) * Azᶜᶜᶠ(i, j, k, grid) * σⁿ(i, j, k, grid, c, c, f)
+        Vⁿ.x[i, j, k] += _diffusive_flux_x(i, j, k, grid, etd, clo, K, c_id, tracer, clk, fields, b) * Axᶠᶜᶜ(i, j, k, grid) * σⁿ(i, j, k, grid, f, c, c)
+        Vⁿ.y[i, j, k] += _diffusive_flux_y(i, j, k, grid, etd, clo, K, c_id, tracer, clk, fields, b) * Ayᶜᶠᶜ(i, j, k, grid) * σⁿ(i, j, k, grid, c, f, c)
+        Vⁿ.z[i, j, k] += _diffusive_flux_z(i, j, k, grid, etd, clo, K, c_id, tracer, clk, fields, b) * Azᶜᶜᶠ(i, j, k, grid) * σⁿ(i, j, k, grid, c, c, f)
     end
     return nothing
 end
diff --git a/src/Models/VarianceDissipationComputations/flatten_dissipation_fields.jl b/src/Models/VarianceDissipationComputations/flatten_dissipation_fields.jl
index 8b8c55b675..c66ab2470e 100644
--- a/src/Models/VarianceDissipationComputations/flatten_dissipation_fields.jl
+++ b/src/Models/VarianceDissipationComputations/flatten_dissipation_fields.jl
@@ -5,7 +5,6 @@ Flatten the dissipation fields of a `VarianceDissipation` object into a named tu
 
 - The dissipation associated with the advection scheme in fields named `A-tracername-dir`
 - The dissipation associated with the closures in fields names `D-tracername-dir`
-- The squared gradients (necessary for computing an "effective diffusivity") in fields named `G-tracername-dir`
 """
 function flatten_dissipation_fields(t::VarianceDissipation) 
     A = t.advective_production
@@ -17,7 +16,6 @@ function flatten_dissipation_fields(t::VarianceDissipation)
 
     prod_names = Tuple(Symbol(:A, tracer_name, dir) for dir in dirs)
     diff_names = Tuple(Symbol(:D, tracer_name, dir) for dir in dirs)
-    grad_names = Tuple(Symbol(:G, tracer_name, dir) for dir in dirs)
 
     advective_prod = Tuple(getproperty(A, dir) for dir in dirs)
     diffusive_prod = Tuple(getproperty(D, dir) for dir in dirs)
diff --git a/src/Models/VarianceDissipationComputations/update_fluxes.jl b/src/Models/VarianceDissipationComputations/update_fluxes.jl
index 553c866b56..be0190f4bd 100644
--- a/src/Models/VarianceDissipationComputations/update_fluxes.jl
+++ b/src/Models/VarianceDissipationComputations/update_fluxes.jl
@@ -16,7 +16,6 @@ function cache_fluxes!(dissipation, model, tracer_name)
     stage = model.clock.stage
 
     update_transport!(Uⁿ, Uⁿ⁻¹, grid, params, timestepper, stage, U)
-
     tracer_id = findfirst(x -> x == tracer_name, keys(model.tracers))
     cache_fluxes!(dissipation, model, tracer_name, Val(tracer_id))
 
@@ -72,7 +71,7 @@ function cache_fluxes!(dissipation, model, tracer_name::Symbol, tracer_id)
 
     if timestepper isa QuasiAdamsBashforth2TimeStepper
         parent(cⁿ⁻¹) .= parent(c)
-    elseif (timestepper isa RungeKuttaScheme) && (stage == 3)
+    elseif (timestepper isa RungeKuttaScheme) && (stage == length(timestepper.β))
         parent(cⁿ⁻¹) .= parent(c)
     end
 
@@ -82,8 +81,8 @@ end
 cache_advective_fluxes!(Fⁿ, Fⁿ⁻¹, grid, params, ::QuasiAdamsBashforth2TimeStepper, stage, advection, U, c) =
     launch!(architecture(grid), grid, params, _cache_advective_fluxes!, Fⁿ, Fⁿ⁻¹, grid, advection, U, c)
 
-function cache_advective_fluxes!(Fⁿ, Fⁿ⁻¹, grid, params, ts::SplitRungeKutta3TimeStepper, stage, advection, U, c)
-    if stage == 2
+function cache_advective_fluxes!(Fⁿ, Fⁿ⁻¹, grid, params, ts::SplitRungeKuttaTimeStepper, stage, advection, U, c)
+    if stage == length(ts.β)-1
         launch!(architecture(grid), grid, params, _cache_advective_fluxes!, Fⁿ, grid, advection, U, c)
     end
 end
@@ -91,8 +90,8 @@ end
 cache_diffusive_fluxes(Vⁿ, Vⁿ⁻¹, grid, params, ::QuasiAdamsBashforth2TimeStepper, stage, clo, D, B, c, tracer_id, clk, model_fields) =
     launch!(architecture(grid), grid, params, _cache_diffusive_fluxes!, Vⁿ, Vⁿ⁻¹, grid, clo, D, B, c, tracer_id, clk, model_fields)
 
-function cache_diffusive_fluxes(Vⁿ, Vⁿ⁻¹, grid, params, ts::SplitRungeKutta3TimeStepper, stage, clo, D, B, c, tracer_id, clk, model_fields)
-    if stage == 2
+function cache_diffusive_fluxes(Vⁿ, Vⁿ⁻¹, grid, params, ts::SplitRungeKuttaTimeStepper, stage, clo, D, B, c, tracer_id, clk, model_fields)
+    if stage == length(ts.β)-1
         launch!(architecture(grid), grid, params, _cache_diffusive_fluxes!, Vⁿ, grid, clo, D, B, c, tracer_id, clk, model_fields)
     end
 end
@@ -100,8 +99,8 @@ end
 update_transport!(Uⁿ, Uⁿ⁻¹, grid, params, ::QuasiAdamsBashforth2TimeStepper, stage, U) =
     launch!(architecture(grid), grid, params, _update_transport!, Uⁿ, Uⁿ⁻¹, grid, U)
 
-function update_transport!(Uⁿ, Uⁿ⁻¹, grid, params, ts::SplitRungeKutta3TimeStepper, stage, U)
-    if stage == 2
+function update_transport!(Uⁿ, Uⁿ⁻¹, grid, params, ts::SplitRungeKuttaTimeStepper, stage, U)
+    if stage == length(ts.β)-1
         launch!(architecture(grid), grid, params, _update_transport!, Uⁿ, grid, U)
     end
 end
diff --git a/src/Models/interleave_communication_and_computation.jl b/src/Models/interleave_communication_and_computation.jl
index 53b11a8587..2f4017c378 100644
--- a/src/Models/interleave_communication_and_computation.jl
+++ b/src/Models/interleave_communication_and_computation.jl
@@ -66,3 +66,14 @@ function interior_tendency_kernel_parameters(arch::AsynchronousDistributed, grid
     return KernelParameters(sizes, offsets)
 end
 
+""" Kernel parameters for computing surface variables including halos. """
+@inline function surface_kernel_parameters(grid)
+    Nx, Ny, _ = size(grid)
+    Hx, Hy, _ = halo_size(grid)
+    Tx, Ty, _ = topology(grid)
+
+    ii = ifelse(Tx == Flat, 1:Nx, -Hx+2:Nx+Hx-1)
+    jj = ifelse(Ty == Flat, 1:Ny, -Hy+2:Ny+Hy-1)
+
+    return KernelParameters(ii, jj)
+end
diff --git a/src/MultiRegion/multi_region_field.jl b/src/MultiRegion/multi_region_field.jl
index 09040ec9f5..eee9937507 100644
--- a/src/MultiRegion/multi_region_field.jl
+++ b/src/MultiRegion/multi_region_field.jl
@@ -6,7 +6,11 @@ using Oceananigans.OutputWriters: output_indices
 
 using Base: @propagate_inbounds
 
-import Oceananigans.BoundaryConditions: regularize_field_boundary_conditions
+import Oceananigans.DistributedComputations: reconstruct_global_field, CommunicationBuffers
+import Oceananigans.BoundaryConditions: regularize_field_boundary_conditions, FieldBoundaryConditions
+import Oceananigans.Grids: xnodes, ynodes
+import Oceananigans.Fields: set!, compute!, compute_at!, interior, validate_field_data, validate_boundary_conditions
+import Oceananigans.Fields: validate_indices, communication_buffers
 import Oceananigans.Diagnostics: hasnan
 import Oceananigans.DistributedComputations: reconstruct_global_field, CommunicationBuffers
 import Oceananigans.Fields: set!, compute!, compute_at!, interior, communication_buffers,
@@ -185,8 +189,8 @@ function inject_regional_bcs(grid, connectivity, loc, indices;
                              top = default_auxiliary_bc(grid, Val(:top), loc),
                              immersed = NoFluxBoundaryCondition())
 
-    west  = inject_west_boundary(connectivity, west)
-    east  = inject_east_boundary(connectivity, east)
+    west  =  inject_west_boundary(connectivity, west)
+    east  =  inject_east_boundary(connectivity, east)
     south = inject_south_boundary(connectivity, south)
     north = inject_north_boundary(connectivity, north)
 
diff --git a/src/Oceananigans.jl b/src/Oceananigans.jl
index 02d9acf60b..633df6f158 100644
--- a/src/Oceananigans.jl
+++ b/src/Oceananigans.jl
@@ -118,6 +118,7 @@ export
     prettytime, apply_regionally!, construct_regionally, @apply_regionally, MultiRegionObject
 
 using DocStringExtensions
+using DoubleFloats
 using FFTW
 
 function __init__()
@@ -141,7 +142,7 @@ end
 # List of fully-supported floating point types where applicable.
 # Currently used only in the Advection module to specialize
 # reconstruction schemes (WENO, UpwindBiased, and Centered).
-const fully_supported_float_types = (Float32, Float64)
+const fully_supported_float_types = (Float32, Float64, BigFloat, Double64)
 
 #####
 ##### Default settings for constructors
diff --git a/src/OrthogonalSphericalShellGrids/distributed_tripolar_grid.jl b/src/OrthogonalSphericalShellGrids/distributed_tripolar_grid.jl
index 6a6d1c5fc8..d0361de01f 100644
--- a/src/OrthogonalSphericalShellGrids/distributed_tripolar_grid.jl
+++ b/src/OrthogonalSphericalShellGrids/distributed_tripolar_grid.jl
@@ -254,8 +254,7 @@ function regularize_field_boundary_conditions(bcs::FieldBoundaryConditions,
 
     elseif yrank == processor_size[2] - 1 && processor_size[1] != 1
         from = arch.local_rank
-        # Search the rank to send to
-        to = arch.connectivity.north
+        to   = arch.connectivity.north
         halo_communication = ZipperHaloCommunicationRanks(sign; from, to)
         DistributedCommunicationBoundaryCondition(halo_communication)
 
@@ -264,9 +263,8 @@ function regularize_field_boundary_conditions(bcs::FieldBoundaryConditions,
 
     end
 
-    bottom = regularize_boundary_condition(bcs.bottom, grid, loc, 3, LeftBoundary,  prognostic_names)
-    top =    regularize_boundary_condition(bcs.top,    grid, loc, 3, RightBoundary, prognostic_names)
-
+    bottom   = regularize_boundary_condition(bcs.bottom, grid, loc, 3, LeftBoundary,  prognostic_names)
+    top      = regularize_boundary_condition(bcs.top,    grid, loc, 3, RightBoundary, prognostic_names)
     immersed = regularize_immersed_boundary_condition(bcs.immersed, grid, loc, field_name, prognostic_names)
 
     return FieldBoundaryConditions(west, east, south, north, bottom, top, immersed)
diff --git a/src/OrthogonalSphericalShellGrids/distributed_zipper.jl b/src/OrthogonalSphericalShellGrids/distributed_zipper.jl
index 345a2b7bf3..af2741886b 100644
--- a/src/OrthogonalSphericalShellGrids/distributed_zipper.jl
+++ b/src/OrthogonalSphericalShellGrids/distributed_zipper.jl
@@ -8,7 +8,7 @@ using Oceananigans.DistributedComputations: cooperative_waitall!,
                                             loc_id,
                                             DCBCT
 
-using Oceananigans.Fields: location
+using Oceananigans.Fields: location, instantiated_location
 
 import Oceananigans.BoundaryConditions: fill_halo_regions!
 import Oceananigans.DistributedComputations: synchronize_communication!
@@ -104,9 +104,7 @@ function synchronize_communication!(field::Field{<:Any, <:Any, <:Any, <:Any, <:D
     recv_from_buffers!(field.data, field.communication_buffers, field.grid)
 
     north_bc = field.boundary_conditions.north
-    instantiated_location = map(instantiate, location(field))
-
-    switch_north_halos!(field, north_bc, field.grid, instantiated_location)
+    switch_north_halos!(field, north_bc, field.grid, instantiated_location(field))
 
     return nothing
 end
diff --git a/src/OutputReaders/field_time_series.jl b/src/OutputReaders/field_time_series.jl
index c3c9c1cac8..6dda9d4a57 100644
--- a/src/OutputReaders/field_time_series.jl
+++ b/src/OutputReaders/field_time_series.jl
@@ -448,6 +448,12 @@ end
 
 struct UnspecifiedBoundaryConditions end
 
+function naturalsort(x::Vector{String})
+    f = text -> all(isnumeric, text) ? Char(parse(Int, text)) : text
+    sorter = key -> join(f(m.match) for m in eachmatch(r"[0-9]+|[^0-9]+", key))
+    sort(x, by=sorter)
+end
+
 """
     FieldTimeSeries(path, name;
                     backend = InMemory(),
@@ -499,6 +505,7 @@ function FieldTimeSeries(path::String, name::String;
         # Look for part1, etc
         lookfor = string(start, "_part*.jld2")
         part_paths = glob(lookfor)
+        part_paths = naturalsort(part_paths)
         Nparts = length(part_paths)
         path = first(part_paths) # part1 is first?
     else
@@ -647,6 +654,7 @@ function FieldTimeSeries(path::String, name::String;
 
         for part in 2:Nparts
             path = part_paths[part]
+            @show path
             file = jldopen(path; reader_kw...)
             part_iterations = parse.(Int, keys(file["timeseries/t"]))
             part_times = [file["timeseries/t/$i"] for i in part_iterations]
diff --git a/src/OutputWriters/jld2_writer.jl b/src/OutputWriters/jld2_writer.jl
index fc233ca1ba..ba11295121 100644
--- a/src/OutputWriters/jld2_writer.jl
+++ b/src/OutputWriters/jld2_writer.jl
@@ -3,6 +3,7 @@ using JLD2
 using Oceananigans.Utils
 using Oceananigans.Utils: TimeInterval, prettykeys, materialize_schedule
 using Oceananigans.Fields: boundary_conditions, indices
+using Oceananigans.DistributedComputations: synchronize_communication!
 
 default_included_properties(model) = [:grid]
 
@@ -233,6 +234,11 @@ end
 
 function write_output!(writer::JLD2Writer, model)
 
+    # Synchronize model state if needed
+    for field in fields(model)
+        synchronize_communication!(field)
+    end
+
     verbose = writer.verbose
     current_iteration = model.clock.iteration
 
diff --git a/src/TimeSteppers/TimeSteppers.jl b/src/TimeSteppers/TimeSteppers.jl
index e025458f06..adae42849b 100644
--- a/src/TimeSteppers/TimeSteppers.jl
+++ b/src/TimeSteppers/TimeSteppers.jl
@@ -3,7 +3,7 @@ module TimeSteppers
 export
     QuasiAdamsBashforth2TimeStepper,
     RungeKutta3TimeStepper,
-    SplitRungeKutta3TimeStepper,
+    SplitRungeKuttaTimeStepper,
     time_step!,
     Clock,
     tendencies
@@ -24,9 +24,6 @@ function update_state! end
 function compute_tendencies! end
 function compute_flux_bc_tendencies! end
 
-compute_pressure_correction!(model, Δt) = nothing
-make_pressure_correction!(model, Δt) = nothing
-
 # Interface for time-stepping Lagrangian particles
 abstract type AbstractLagrangianParticles end
 step_lagrangian_particles!(model, Δt) = nothing
@@ -38,7 +35,7 @@ include("clock.jl")
 include("store_tendencies.jl")
 include("quasi_adams_bashforth_2.jl")
 include("runge_kutta_3.jl")
-include("split_hydrostatic_runge_kutta_3.jl")
+include("split_runge_kutta.jl")
 
 """
     TimeStepper(name::Symbol, args...; kwargs...)
@@ -64,8 +61,15 @@ TimeStepper(::Val{:QuasiAdamsBashforth2}, args...; kwargs...) =
 TimeStepper(::Val{:RungeKutta3}, args...; kwargs...) =
     RungeKutta3TimeStepper(args...; kwargs...)
 
-TimeStepper(::Val{:SplitRungeKutta3}, args...; kwargs...) =
-    SplitRungeKutta3TimeStepper(args...; kwargs...)
+# Convenience constructors for SplitRungeKuttaTimeStepper with 2 to 40 stages
+# By calling TimeStepper(:SplitRungeKuttaN, ...) 
+for stages in 1:40
+    @eval TimeStepper(::Val{Symbol(:SplitRungeKutta, $stages)}, args...; kwargs...) =
+              SplitRungeKuttaTimeStepper(args...; coefficients=tuple(collect($stages:-1:1)...), kwargs...)
+end
+
+TimeStepper(ts::SplitRungeKuttaTimeStepper, grid, prognostic_fields; kw...) = 
+    SplitRungeKuttaTimeStepper(grid, prognostic_fields; coefficients=ts.β, kw...)
 
 function first_time_step!(model::AbstractModel, Δt)
     initialize!(model)
diff --git a/src/TimeSteppers/quasi_adams_bashforth_2.jl b/src/TimeSteppers/quasi_adams_bashforth_2.jl
index b5e7b3efb9..7ef56936f0 100644
--- a/src/TimeSteppers/quasi_adams_bashforth_2.jl
+++ b/src/TimeSteppers/quasi_adams_bashforth_2.jl
@@ -76,9 +76,6 @@ function time_step!(model::AbstractModel{<:QuasiAdamsBashforth2TimeStepper}, Δt
 
     Δt == 0 && @warn "Δt == 0 may cause model blowup!"
 
-    # Be paranoid and update state at iteration 0
-    model.clock.iteration == 0 && update_state!(model, callbacks; compute_tendencies=true)
-
     # Take an euler step if:
     #   * We detect that the time-step size has changed.
     #   * We detect that this is the "first" time-step, which means we
@@ -88,6 +85,10 @@ function time_step!(model::AbstractModel{<:QuasiAdamsBashforth2TimeStepper}, Δt
     euler = euler || (Δt != model.clock.last_Δt)
     euler && @debug "Taking a forward Euler step."
 
+    if model.clock.iteration == 0
+        update_state!(model, callbacks)
+    end
+    
     # If euler, then set χ = -0.5
     minus_point_five = convert(eltype(model.grid), -0.5)
     ab2_timestepper = model.timestepper
@@ -95,16 +96,12 @@ function time_step!(model::AbstractModel{<:QuasiAdamsBashforth2TimeStepper}, Δt
     χ₀ = ab2_timestepper.χ # Save initial value
     ab2_timestepper.χ = χ
 
-    # Full step for tracers, fractional step for velocities.
-    compute_flux_bc_tendencies!(model)
-    ab2_step!(model, Δt)
+    ab2_step!(model, Δt, callbacks)
+    cache_previous_tendencies!(model)
+    update_state!(model, callbacks)
 
     tick!(model.clock, Δt)
 
-    compute_pressure_correction!(model, Δt)
-    @apply_regionally correct_velocities_and_cache_previous_tendencies!(model, Δt)
-
-    update_state!(model, callbacks; compute_tendencies=true)
     step_lagrangian_particles!(model, Δt)
 
     # Return χ to initial value
@@ -113,48 +110,8 @@ function time_step!(model::AbstractModel{<:QuasiAdamsBashforth2TimeStepper}, Δt
     return nothing
 end
 
-function correct_velocities_and_cache_previous_tendencies!(model, Δt)
-    make_pressure_correction!(model, Δt)
-    cache_previous_tendencies!(model)
-    return nothing
-end
-
-#####
-##### Time stepping in each step
-#####
-
-""" Generic implementation. """
-function ab2_step!(model, Δt)
-    grid = model.grid
-    FT = eltype(grid)
-    arch = architecture(grid)
-    model_fields = prognostic_fields(model)
-    χ = model.timestepper.χ
-    Δt = convert(FT, Δt)
-    χ = convert(FT, χ)
-
-    for (i, field) in enumerate(model_fields)
-        kernel_args = (field, Δt, χ, model.timestepper.Gⁿ[i], model.timestepper.G⁻[i])
-        launch!(arch, grid, :xyz, ab2_step_field!, kernel_args...; exclude_periphery=true)
-
-        # TODO: function tracer_index(model, field_index) = field_index - 3, etc...
-        tracer_index = Val(i - 3) # assumption
-
-        implicit_step!(field,
-                       model.timestepper.implicit_solver,
-                       model.closure,
-                       model.diffusivity_fields,
-                       tracer_index,
-                       model.clock,
-                       fields(model),
-                       Δt)
-    end
-
-    return nothing
-end
-
 """
-Time step velocity fields via the 2nd-order quasi Adams-Bashforth method
+Time step fields via the 2nd-order quasi Adams-Bashforth method
 
     `U^{n+1} = U^n + Δt ((3/2 + χ) * G^{n} - (1/2 + χ) G^{n-1})`
 
@@ -175,3 +132,10 @@ Time step velocity fields via the 2nd-order quasi Adams-Bashforth method
 end
 
 @kernel ab2_step_field!(::FunctionField, Δt, χ, Gⁿ, G⁻) = nothing
+
+#####
+##### These functions need to be implemented by every model independently
+#####
+
+ab2_step!(model::AbstractModel, Δt, callbacks) = error("ab2_step! not implemented for $(typeof(model))")
+cache_previous_tendencies!(model::AbstractModel) = error("cache_previous_tendencies! not implemented for $(typeof(model))")
diff --git a/src/TimeSteppers/runge_kutta_3.jl b/src/TimeSteppers/runge_kutta_3.jl
index 7771b451f5..df22cbfdea 100644
--- a/src/TimeSteppers/runge_kutta_3.jl
+++ b/src/TimeSteppers/runge_kutta_3.jl
@@ -94,7 +94,7 @@ function time_step!(model::AbstractModel{<:RungeKutta3TimeStepper}, Δt; callbac
     Δt == 0 && @warn "Δt == 0 may cause model blowup!"
 
     # Be paranoid and update state at iteration 0, in case run! is not used:
-    model.clock.iteration == 0 && update_state!(model, callbacks; compute_tendencies = true)
+    model.clock.iteration == 0 && update_state!(model, callbacks)
 
     γ¹ = model.timestepper.γ¹
     γ² = model.timestepper.γ²
@@ -115,40 +115,32 @@ function time_step!(model::AbstractModel{<:RungeKutta3TimeStepper}, Δt; callbac
     # First stage
     #
 
-    compute_flux_bc_tendencies!(model)
-    rk3_substep!(model, Δt, γ¹, nothing)
+    rk3_substep!(model, Δt, γ¹, nothing, callbacks)
+    cache_previous_tendencies!(model)
 
     tick!(model.clock, first_stage_Δt; stage=true)
 
-    compute_pressure_correction!(model, first_stage_Δt)
-    make_pressure_correction!(model, first_stage_Δt)
-
-    cache_previous_tendencies!(model)
-    update_state!(model, callbacks; compute_tendencies = true)
+    update_state!(model, callbacks)
     step_lagrangian_particles!(model, first_stage_Δt)
 
     #
     # Second stage
     #
 
-    compute_flux_bc_tendencies!(model)
-    rk3_substep!(model, Δt, γ², ζ²)
+    rk3_substep!(model, Δt, γ², ζ², callbacks)
+    cache_previous_tendencies!(model)
 
     tick!(model.clock, second_stage_Δt; stage=true)
 
-    compute_pressure_correction!(model, second_stage_Δt)
-    make_pressure_correction!(model, second_stage_Δt)
-
-    cache_previous_tendencies!(model)
-    update_state!(model, callbacks; compute_tendencies = true)
+    update_state!(model, callbacks)
     step_lagrangian_particles!(model, second_stage_Δt)
 
     #
     # Third stage
     #
 
-    compute_flux_bc_tendencies!(model)
-    rk3_substep!(model, Δt, γ³, ζ³)
+    rk3_substep!(model, Δt, γ³, ζ³, callbacks)
+    cache_previous_tendencies!(model)
 
     # This adjustment of the final time-step reduces the accumulation of
     # round-off error when Δt is added to model.clock.time. Note that we still use
@@ -160,15 +152,15 @@ function time_step!(model::AbstractModel{<:RungeKutta3TimeStepper}, Δt; callbac
     model.clock.last_stage_Δt = corrected_third_stage_Δt
     model.clock.last_Δt = Δt
 
-    compute_pressure_correction!(model, third_stage_Δt)
-    make_pressure_correction!(model, third_stage_Δt)
-
-    update_state!(model, callbacks; compute_tendencies = true)
+    update_state!(model, callbacks)
     step_lagrangian_particles!(model, third_stage_Δt)
 
     return nothing
 end
 
+# Needs to be implemented by every model independently
+rk3_substep!(model::AbstractModel, Δt, γⁿ, ζⁿ, callbacks) = error("rk3_substep! not implemented for $(typeof(model))")
+
 #####
 ##### Time stepping in each substep
 #####
@@ -176,32 +168,6 @@ end
 stage_Δt(Δt, γⁿ, ζⁿ) = Δt * (γⁿ + ζⁿ)
 stage_Δt(Δt, γⁿ, ::Nothing) = Δt * γⁿ
 
-function rk3_substep!(model, Δt, γⁿ, ζⁿ)
-
-    grid = model.grid
-    arch = architecture(grid)
-    model_fields = prognostic_fields(model)
-
-    for (i, field) in enumerate(model_fields)
-        kernel_args = (field, Δt, γⁿ, ζⁿ, model.timestepper.Gⁿ[i], model.timestepper.G⁻[i])
-        launch!(arch, grid, :xyz, rk3_substep_field!, kernel_args...; exclude_periphery=true)
-
-        # TODO: function tracer_index(model, field_index) = field_index - 3, etc...
-        tracer_index = Val(i - 3) # assumption
-
-        implicit_step!(field,
-                       model.timestepper.implicit_solver,
-                       model.closure,
-                       model.diffusivity_fields,
-                       tracer_index,
-                       model.clock,
-                       fields(model),
-                       stage_Δt(Δt, γⁿ, ζⁿ))
-    end
-
-    return nothing
-end
-
 """
 Time step velocity fields via the 3rd-order Runge-Kutta method
 
@@ -211,16 +177,10 @@ where `m` denotes the substage.
 """
 @kernel function rk3_substep_field!(U, Δt, γⁿ::FT, ζⁿ, Gⁿ, G⁻) where FT
     i, j, k = @index(Global, NTuple)
-
-    @inbounds begin
-        U[i, j, k] += convert(FT, Δt) * (γⁿ * Gⁿ[i, j, k] + ζⁿ * G⁻[i, j, k])
-    end
+    @inbounds U[i, j, k] += convert(FT, Δt) * (γⁿ * Gⁿ[i, j, k] + ζⁿ * G⁻[i, j, k])
 end
 
 @kernel function rk3_substep_field!(U, Δt, γ¹::FT, ::Nothing, G¹, G⁰) where FT
     i, j, k = @index(Global, NTuple)
-
-    @inbounds begin
-        U[i, j, k] += convert(FT, Δt) * γ¹ * G¹[i, j, k]
-    end
+    @inbounds U[i, j, k] += convert(FT, Δt) * γ¹ * G¹[i, j, k]
 end
diff --git a/src/TimeSteppers/split_hydrostatic_runge_kutta_3.jl b/src/TimeSteppers/split_hydrostatic_runge_kutta_3.jl
deleted file mode 100644
index cfa5e7b951..0000000000
--- a/src/TimeSteppers/split_hydrostatic_runge_kutta_3.jl
+++ /dev/null
@@ -1,168 +0,0 @@
-using Oceananigans.Architectures: architecture
-using Oceananigans: fields
-
-"""
-    SplitRungeKutta3TimeStepper{FT, TG, PF, TI} <: AbstractTimeStepper
-
-Hold parameters and tendency fields for a low storage, third-order Runge-Kutta-Wray
-time-stepping scheme described by [Lan et al. (2022)](@cite Lan2022).
-"""
-struct SplitRungeKutta3TimeStepper{FT, TG, PF, TI} <: AbstractTimeStepper
-    β¹ :: FT
-    β² :: FT
-    Gⁿ :: TG
-    Ψ⁻ :: PF # prognostic state at the previous timestep
-    implicit_solver :: TI
-end
-
-"""
-    SplitRungeKutta3TimeStepper(grid, prognostic_fields, args...;
-                                implicit_solver::TI = nothing,
-                                Gⁿ::TG = map(similar, prognostic_fields),
-                                Ψ⁻::PF = map(similar, prognostic_fields),
-                                kwargs...) where {TI, TG, PF}
-
-Return a 3rd-order `SplitRungeKutta3TimeStepper` on `grid` and with `tracers`.
-The tendency fields `Gⁿ` and `G⁻`, and the previous state `Ψ⁻` can be modified
-via optional `kwargs`.
-
-The scheme is described by [Knoth and Wensch (2014)](@cite knoth2014). In a nutshell,
-the 3rd-order Runge-Kutta timestepper steps forward the state `Uⁿ` by `Δt` via 3 substeps.
-A barotropic velocity correction step is applied after at each substep.
-
-The state `U` after each substep `m` is equivalent to an Euler step with a modified time step:
-
-```julia
-Δt̃   = Δt / βᵐ
-Uᵐ⁺¹ = Uⁿ + Δt̃ * Gᵐ
-```
-
-where `Uᵐ` is the state at the ``m``-th substep, `Uⁿ` is the state at the ``n``-th timestep,
-`Gᵐ` is the tendency at the ``m``-th substep, and constants `β¹ = 3`, `β² = 2`, `β³ = 1`.
-
-The state at the first substep is taken to be the one that corresponds to the ``n``-th timestep,
-`U¹ = Uⁿ`, and the state after the third substep is then the state at the `Uⁿ⁺¹ = U³`.
-
-References
-==========
-
-Knoth, O., and Wensch, J. (2014). Generalized Split-Explicit Runge-Kutta methods for the
-    compressible Euler equations. Monthly Weather Review, 142, 2067-2081,
-    https://doi.org/10.1175/MWR-D-13-00068.1.
-"""
-function SplitRungeKutta3TimeStepper(grid, prognostic_fields, args...;
-                                     implicit_solver::TI = nothing,
-                                     Gⁿ::TG = map(similar, prognostic_fields),
-                                     Ψ⁻::PF = map(similar, prognostic_fields),
-                                     kwargs...) where {TI, TG, PF}
-
-    @warn("Split barotropic-baroclinic time stepping with SplitRungeKutta3TimeStepper is experimental.\n" *
-          "Use at own risk, and report any issues encountered at [https://github.com/CliMA/Oceananigans.jl/issues](https://github.com/CliMA/Oceananigans.jl/issues).")
-
-    FT = eltype(grid)
-    β¹ = 3
-    β² = 2
-
-    return SplitRungeKutta3TimeStepper{FT, TG, PF, TI}(β¹, β², Gⁿ, Ψ⁻, implicit_solver)
-end
-
-function time_step!(model::AbstractModel{<:SplitRungeKutta3TimeStepper}, Δt; callbacks=[])
-    Δt == 0 && @warn "Δt == 0 may cause model blowup!"
-
-    # Be paranoid and update state at iteration 0, in case run! is not used:
-    model.clock.iteration == 0 && update_state!(model, callbacks; compute_tendencies = true)
-
-    cache_previous_fields!(model)
-    β¹ = model.timestepper.β¹
-    β² = model.timestepper.β²
-
-    ####
-    #### First stage
-    ####
-
-    # First stage: n -> n + 1/3
-    model.clock.stage = 1
-
-    compute_flux_bc_tendencies!(model)
-    split_rk3_substep!(model, Δt / β¹)
-    compute_pressure_correction!(model, Δt / β¹)
-    make_pressure_correction!(model, Δt / β¹)
-    update_state!(model, callbacks; compute_tendencies = true)
-
-    ####
-    #### Second stage
-    ####
-
-    # Second stage: n -> n + 1/2
-    model.clock.stage = 2
-
-    compute_flux_bc_tendencies!(model)
-    split_rk3_substep!(model, Δt / β²)
-    compute_pressure_correction!(model, Δt / β²)
-    make_pressure_correction!(model, Δt / β²)
-    update_state!(model, callbacks; compute_tendencies = true)
-
-    ####
-    #### Third stage
-    ####
-
-    # Third stage: n -> n + 1
-    model.clock.stage = 3
-
-    compute_flux_bc_tendencies!(model)
-    split_rk3_substep!(model, Δt)
-    compute_pressure_correction!(model, Δt)
-    make_pressure_correction!(model, Δt)
-    update_state!(model, callbacks; compute_tendencies = true)
-
-    step_lagrangian_particles!(model, Δt)
-
-    tick!(model.clock, Δt)
-
-    return nothing
-end
-
-@kernel function _euler_substep_field!(field, Δt, Gⁿ, Ψ⁻)
-    i, j, k = @index(Global, NTuple)
-    @inbounds field[i, j, k] = Ψ⁻[i, j, k] + Δt * Gⁿ[i, j, k]
-end
-
-function split_rk3_substep!(model, Δt)
-
-    grid = model.grid
-    FT   = eltype(grid)
-    arch = architecture(grid)
-    model_fields = prognostic_fields(model)
-
-    for (i, field) in enumerate(model_fields)
-        Ψ⁻ = model.timestepper.Ψ⁻[i]
-        Gⁿ = model.timestepper.Gⁿ[i]
-        launch!(arch, grid, :xyz, _euler_substep_field!, field, convert(FT, Δt), Gⁿ, Ψ⁻)
-
-        # TODO: function tracer_index(model, field_index) = field_index - 3, etc...
-        tracer_index = Val(i - 3) # assumption
-
-        implicit_step!(field,
-                       model.timestepper.implicit_solver,
-                       model.closure,
-                       model.diffusivity_fields,
-                       tracer_index,
-                       model.clock,
-                       fields(model),
-                       Δt)
-    end
-end
-
-function cache_previous_fields!(model)
-
-    previous_fields = model.timestepper.Ψ⁻
-    model_fields = prognostic_fields(model)
-
-    for name in keys(previous_fields)
-        if !isnothing(previous_fields[name])
-            parent(previous_fields[name]) .= parent(model_fields[name]) # Storing also the halos
-        end
-    end
-
-    return nothing
-end
diff --git a/src/TimeSteppers/split_runge_kutta.jl b/src/TimeSteppers/split_runge_kutta.jl
new file mode 100644
index 0000000000..235214e003
--- /dev/null
+++ b/src/TimeSteppers/split_runge_kutta.jl
@@ -0,0 +1,127 @@
+using Oceananigans.Architectures: architecture
+using Oceananigans: fields
+
+"""
+    SplitRungeKuttaTimeStepper{FT, TG, PF, TI} <: AbstractTimeStepper
+
+Hold parameters and tendency fields for a low storage, nth-order Runge-Kutta time-stepping scheme
+"""
+struct SplitRungeKuttaTimeStepper{B, TG, PF, TI} <: AbstractTimeStepper
+    β  :: B
+    Gⁿ :: TG
+    Ψ⁻ :: PF # prognostic state at the previous timestep
+    implicit_solver :: TI
+end
+
+"""
+    SplitRungeKuttaTimeStepper(grid, prognostic_fields, args...;
+                               implicit_solver::TI = nothing,
+                               Gⁿ::TG = map(similar, prognostic_fields),
+                               Ψ⁻::PF = map(similar, prognostic_fields),
+                               kwargs...) where {TI, TG, PF}
+
+Return a nth-order `SplitRungeKuttaTimeStepper` on `grid` and with `tracers`.
+The tendency fields `Gⁿ` and `G⁻`, and the previous state `Ψ⁻` can be modified
+via optional `kwargs`.
+
+The scheme is described by [Knoth and Wensch (2014)](@cite knoth2014). In a nutshell,
+the nth-order low-storage Runge-Kutta timestepper steps forward the state `Uⁿ` by `Δt` via n substeps.
+A barotropic velocity correction step is applied after at each substep.
+
+The state `U` after each substep `m` is equivalent to an Euler step with a modified time step:
+
+```julia
+Δt̃   = Δt / βᵐ
+Uᵐ⁺¹ = Uⁿ + Δt̃ * Gᵐ
+```
+
+where `Uᵐ` is the state at the ``m``-th substep, `Uⁿ` is the state at the ``n``-th timestep,
+`Gᵐ` is the tendency at the ``m``-th substep. The coefficients `β` can be specified by the user,
+and default to `(3, 2, 1)` for a three-stage scheme. The number of stages is inferred from the length of the
+`β` tuple.
+
+The state at the first substep is taken to be the one that corresponds to the ``n``-th timestep,
+`U¹ = Uⁿ`, and the state after the third substep is then the state at the `Uⁿ⁺¹ = U³`.
+
+References
+==========
+
+Knoth, O., and Wensch, J. (2014). Generalized Split-Explicit Runge-Kutta methods for the
+    compressible Euler equations. Monthly Weather Review, 142, 2067-2081,
+    https://doi.org/10.1175/MWR-D-13-00068.1.
+"""
+function SplitRungeKuttaTimeStepper(grid, prognostic_fields, args...;
+                                    implicit_solver::TI = nothing,
+                                    coefficients = (3, 2, 1),
+                                    Gⁿ::TG = map(similar, prognostic_fields),
+                                    Ψ⁻::PF = map(similar, prognostic_fields),
+                                    kwargs...) where {TI, TG, PF}
+
+    @warn("Split barotropic-baroclinic time stepping with SplitRungeKuttaTimeStepper is experimental.\n" *
+          "Use at own risk, and report any issues encountered at [https://github.com/CliMA/Oceananigans.jl/issues](https://github.com/CliMA/Oceananigans.jl/issues).")
+
+    return SplitRungeKuttaTimeStepper{typeof(coefficients), TG, PF, TI}(coefficients, Gⁿ, Ψ⁻, implicit_solver)
+end
+
+# Simple constructor that only requires only the coefficients or the number of stages
+function SplitRungeKuttaTimeStepper(; coefficients = nothing, stages = nothing) 
+    if coefficients !== nothing && stages !== nothing
+        error("Cannot specify both `coefficients` and `stages`.")
+    end
+    if coefficients == nothing 
+        coefficients = tuple(collect(stages:-1:1)...)
+    end
+    return SplitRungeKuttaTimeStepper{typeof(coefficients), Nothing, Nothing, Nothing}(coefficients, nothing, nothing, nothing)
+end
+
+# Utility to compute low-storage coefficients from spectral coefficients. This is
+# useful to minimize dispersion and dissipation errors:
+# see Hu et al., Low-Dissipation and Low-Dispersion Runge–Kutta Schemes for Computational Acoustics, 1996
+function spectral_coefficients(c::AbstractVector)
+    N = length(c)
+    b = similar(c)
+    for i in 1:N-1
+        b[i] = c[N - i] / c[N - i + 1] 
+    end
+    b[end] = 1
+    return tuple(b...)
+end
+
+function time_step!(model::AbstractModel{<:SplitRungeKuttaTimeStepper}, Δt; callbacks=[])
+
+    if model.clock.iteration == 0
+        update_state!(model, callbacks)
+    end
+
+    cache_previous_fields!(model)
+    grid = model.grid
+
+    ####
+    #### Loop over the stages
+    ####
+
+    for (stage, β) in enumerate(model.timestepper.β)
+        # Update the clock stage
+        model.clock.stage = stage
+        
+        # Perform the substep
+        Δτ = Δt / β
+        rk_substep!(model, Δτ, callbacks)
+
+        # Update the state
+        update_state!(model, callbacks)
+    end
+    
+    # Finalize step
+    step_lagrangian_particles!(model, Δt)
+    tick!(model.clock, Δt)
+
+    return nothing
+end
+
+#####
+##### These functions need to be implemented by every model independently
+#####
+
+rk_substep!(model::AbstractModel, Δt, callbacks) = error("rk_substep! not implemented for $(typeof(model))")
+cache_previous_fields!(model::AbstractModel) = error("cache_previous_fields! not implemented for $(typeof(model))")
diff --git a/src/TimeSteppers/store_tendencies.jl b/src/TimeSteppers/store_tendencies.jl
index 03f4f520cd..e69de29bb2 100644
--- a/src/TimeSteppers/store_tendencies.jl
+++ b/src/TimeSteppers/store_tendencies.jl
@@ -1,22 +0,0 @@
-using Oceananigans: prognostic_fields
-using Oceananigans.Grids: AbstractGrid
-using Oceananigans.Utils: launch!
-
-""" Store source terms for `u`, `v`, and `w`. """
-@kernel function _cache_field_tendencies!(G⁻, G⁰)
-    i, j, k = @index(Global, NTuple)
-    @inbounds G⁻[i, j, k] = G⁰[i, j, k]
-end
-
-""" Store previous source terms before updating them. """
-function cache_previous_tendencies!(model)
-    model_fields = prognostic_fields(model)
-
-    for field_name in keys(model_fields)
-        launch!(model.architecture, model.grid, :xyz, _cache_field_tendencies!,
-                model.timestepper.G⁻[field_name],
-                model.timestepper.Gⁿ[field_name])
-    end
-
-    return nothing
-end
diff --git a/src/TurbulenceClosures/turbulence_closure_implementations/TKEBasedVerticalDiffusivities/time_step_catke_equation.jl b/src/TurbulenceClosures/turbulence_closure_implementations/TKEBasedVerticalDiffusivities/time_step_catke_equation.jl
index 79369d6541..8dbc2ad59b 100644
--- a/src/TurbulenceClosures/turbulence_closure_implementations/TKEBasedVerticalDiffusivities/time_step_catke_equation.jl
+++ b/src/TurbulenceClosures/turbulence_closure_implementations/TKEBasedVerticalDiffusivities/time_step_catke_equation.jl
@@ -5,7 +5,7 @@ using Oceananigans.Fields: immersed_boundary_condition
 using Oceananigans.Grids: get_active_cells_map, bottommost_active_node
 using Oceananigans.BoundaryConditions: compute_x_bcs!, compute_y_bcs!, compute_z_bcs!
 using Oceananigans.TimeSteppers: ab2_step_field!, implicit_step!
-using Oceananigans.TimeSteppers: QuasiAdamsBashforth2TimeStepper, SplitRungeKutta3TimeStepper
+using Oceananigans.TimeSteppers: QuasiAdamsBashforth2TimeStepper, SplitRungeKuttaTimeStepper
 using Oceananigans.TurbulenceClosures: ∇_dot_qᶜ, immersed_∇_dot_qᶜ, hydrostatic_turbulent_kinetic_energy_tendency
 
 get_time_step(closure::CATKEVerticalDiffusivity) = closure.tke_time_step
@@ -87,7 +87,7 @@ end
                                 stage == 2 ? (ts.γ², ts.ζ²) :
                                              (ts.γ³, ts.ζ³) 
                                 
-function time_step_catke_equation!(model, ::SplitRungeKutta3TimeStepper)
+function time_step_catke_equation!(model, ::SplitRungeKuttaTimeStepper)
 
     # TODO: properly handle closure tuples
     if model.closure isa Tuple
@@ -110,8 +110,7 @@ function time_step_catke_equation!(model, ::SplitRungeKutta3TimeStepper)
     tracer_index = findfirst(k -> k == :e, keys(model.tracers))
     implicit_solver = model.timestepper.implicit_solver
 
-    β  = model.clock.stage == 1 ? model.timestepper.β¹ :
-         model.clock.stage == 2 ? model.timestepper.β² : 1
+    β  = model.timestepper.β[model.clock.stage]  # Get the correct β value for the current stage
     Δt = model.clock.last_Δt / β
 
     # Compute the linear implicit component of the RHS (diffusivities, L)...
diff --git a/test/dependencies_for_poisson_solvers.jl b/test/dependencies_for_poisson_solvers.jl
index c6d01b2be6..2f46d2c25c 100644
--- a/test/dependencies_for_poisson_solvers.jl
+++ b/test/dependencies_for_poisson_solvers.jl
@@ -93,7 +93,7 @@ function random_divergence_free_source_term(grid)
 
     arch = architecture(grid)
 
-    compute_w_from_continuity!(U, arch, grid)
+    compute_w_from_continuity!(U, grid)
     fill_halo_regions!(Rw)
 
     # Compute the right hand side R = ∇⋅U
diff --git a/test/regression_tests/hydrostatic_free_turbulence_regression_test.jl b/test/regression_tests/hydrostatic_free_turbulence_regression_test.jl
index a7425db137..8fd764604a 100644
--- a/test/regression_tests/hydrostatic_free_turbulence_regression_test.jl
+++ b/test/regression_tests/hydrostatic_free_turbulence_regression_test.jl
@@ -36,6 +36,7 @@ function run_hydrostatic_free_turbulence_regression_test(grid, free_surface; reg
     model = HydrostaticFreeSurfaceModel(; grid, coriolis,
                                         momentum_advection = VectorInvariant(),
                                         free_surface = free_surface,
+                                        timestepper = :QuasiAdamsBashforth2, 
                                         closure = HorizontalScalarDiffusivity(ν=1e+5, κ=1e+4))
 
     #####
diff --git a/test/regression_tests/ocean_large_eddy_simulation_regression_test.jl b/test/regression_tests/ocean_large_eddy_simulation_regression_test.jl
index dd1b78478e..4a599647ff 100644
--- a/test/regression_tests/ocean_large_eddy_simulation_regression_test.jl
+++ b/test/regression_tests/ocean_large_eddy_simulation_regression_test.jl
@@ -131,7 +131,7 @@ function run_ocean_large_eddy_simulation_regression_test(arch, grid_type, closur
     model.clock.time = spinup_steps * Δt
     model.clock.iteration = spinup_steps
 
-    update_state!(model; compute_tendencies = true)
+    update_state!(model)
     model.clock.last_Δt = Δt
 
     for n in 1:test_steps
diff --git a/test/runtests.jl b/test/runtests.jl
index 6c7c2dd450..c3deed9678 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -189,6 +189,7 @@ CUDA.allowscalar() do
         archs = test_architectures()
         include("test_hydrostatic_regression.jl")
         include("test_distributed_hydrostatic_model.jl")
+        # include("test_zstar_conservation.jl")
     end
 
     # if group == :distributed_output || group == :all
@@ -222,6 +223,7 @@ CUDA.allowscalar() do
     if group == :vertical_coordinate || group == :all
         @testset "Vertical coordinate tests" begin
             include("test_zstar_coordinate.jl")
+            include("test_zstar_conservation.jl")
         end
     end
 
@@ -232,7 +234,6 @@ CUDA.allowscalar() do
         end
     end
 
-
     # Tests for Enzyme extension
     if group == :enzyme || group == :all
         @testset "Enzyme extension tests" begin
diff --git a/test/test_boundary_conditions_integration.jl b/test/test_boundary_conditions_integration.jl
index 2253dff352..9a2d461f88 100644
--- a/test/test_boundary_conditions_integration.jl
+++ b/test/test_boundary_conditions_integration.jl
@@ -105,8 +105,6 @@ function fluxes_with_diffusivity_boundary_conditions_are_correct(arch, FT)
     return isapprox(mean(b) - mean_b₀, flux * model.clock.time / Lz, atol=1e-6)
 end
 
-
-
 end_position(::Val{1}, grid) = (grid.Nx+1, 1, 1)
 end_position(::Val{2}, grid) = (1, grid.Ny+1, 1)
 end_position(::Val{3}, grid) = (1, 1, grid.Nz+1)
diff --git a/test/test_conjugate_gradient_poisson_solver.jl b/test/test_conjugate_gradient_poisson_solver.jl
index 600ce7cc0d..59cc7440be 100644
--- a/test/test_conjugate_gradient_poisson_solver.jl
+++ b/test/test_conjugate_gradient_poisson_solver.jl
@@ -2,7 +2,7 @@ include("dependencies_for_runtests.jl")
 include("dependencies_for_poisson_solvers.jl")
 
 using Oceananigans.Solvers: fft_poisson_solver, ConjugateGradientPoissonSolver, DiagonallyDominantPreconditioner
-using Oceananigans.TimeSteppers: compute_pressure_correction!
+using Oceananigans.Models.NonhydrostaticModels: compute_pressure_correction!
 using Oceananigans.Grids: XYZRegularRG
 using LinearAlgebra: norm
 using Random: seed!
diff --git a/test/test_distributed_architectures.jl b/test/test_distributed_architectures.jl
index 08303a71a4..601591e16a 100644
--- a/test/test_distributed_architectures.jl
+++ b/test/test_distributed_architectures.jl
@@ -80,4 +80,3 @@ end
     end
 end
 =#
-
diff --git a/test/test_distributed_hydrostatic_model.jl b/test/test_distributed_hydrostatic_model.jl
index bf9d4a4366..d7118ba79a 100644
--- a/test/test_distributed_hydrostatic_model.jl
+++ b/test/test_distributed_hydrostatic_model.jl
@@ -33,7 +33,7 @@ end
 
 @inline Gaussian(x, y, L) = exp(-(x^2 + y^2) / L^2)
 
-function rotation_with_shear_test(grid, closure=nothing)
+function rotation_with_shear_test(grid, closure=nothing; timestepper=:QuasiAdamsBashforth2)
 
     free_surface = SplitExplicitFreeSurface(grid; substeps = 8, gravitational_acceleration = 1)
     coriolis     = HydrostaticSphericalCoriolis(rotation_rate = 1)
@@ -50,6 +50,7 @@ function rotation_with_shear_test(grid, closure=nothing)
                                         coriolis = coriolis,
                                         closure,
                                         tracers,
+                                        timestepper,
                                         tracer_advection = WENO(order=3),
                                         buoyancy = BuoyancyTracer())
 
@@ -111,51 +112,46 @@ for arch in archs
 
             for (grid, global_grid) in zip((underlying_grid, immersed_grid, immersed_active_grid),
                                            (global_underlying_grid, global_immersed_grid, global_immersed_grid))
-                if arch.local_rank == 0
-                    @info "  Testing distributed solid body rotation with $(ranks(arch)) ranks on $(typeof(grid).name.wrapper)"
+                
+                for timestepper in (:QuasiAdamsBashforth2, :SplitRungeKutta3, :SplitRungeKutta5)
+                    @root @info "  Testing distributed solid body rotation with $(ranks(arch)) ranks on $(typeof(grid).name.wrapper) on $(timestepper)"
+                    
+                    # "s" for "serial" computation, "p" for parallel
+                    ms = rotation_with_shear_test(global_grid; timestepper)
+                    mp = rotation_with_shear_test(grid; timestepper)
+
+                    us = interior(on_architecture(CPU(), ms.velocities.u))
+                    vs = interior(on_architecture(CPU(), ms.velocities.v))
+                    cs = interior(on_architecture(CPU(), ms.tracers.c))
+                    ηs = interior(on_architecture(CPU(), ms.free_surface.η))
+
+                    cpu_arch = cpu_architecture(arch)
+
+                    up = interior(on_architecture(cpu_arch, mp.velocities.u))
+                    vp = interior(on_architecture(cpu_arch, mp.velocities.v))
+                    cp = interior(on_architecture(cpu_arch, mp.tracers.c))
+                    ηp = interior(on_architecture(cpu_arch, mp.free_surface.η))
+
+                    us = partition(us, cpu_arch, size(up))
+                    vs = partition(vs, cpu_arch, size(vp))
+                    cs = partition(cs, cpu_arch, size(cp))
+                    ηs = partition(ηs, cpu_arch, size(ηp))
+
+                    atol = eps(eltype(grid))
+                    rtol = sqrt(eps(eltype(grid)))
+
+                    @test all(isapprox(up, us; atol, rtol))
+                    @test all(isapprox(vp, vs; atol, rtol))
+                    @test all(isapprox(cp, cs; atol, rtol))
+                    @test all(isapprox(ηp, ηs; atol, rtol))
                 end
-
-                # "s" for "serial" computation, "p" for parallel
-                ms = rotation_with_shear_test(global_grid)
-                mp = rotation_with_shear_test(grid)
-
-                us = interior(on_architecture(CPU(), ms.velocities.u))
-                vs = interior(on_architecture(CPU(), ms.velocities.v))
-                ws = interior(on_architecture(CPU(), ms.velocities.w))
-                cs = interior(on_architecture(CPU(), ms.tracers.c))
-                ηs = interior(on_architecture(CPU(), ms.free_surface.η))
-
-                cpu_arch = cpu_architecture(arch)
-
-                up = interior(on_architecture(cpu_arch, mp.velocities.u))
-                vp = interior(on_architecture(cpu_arch, mp.velocities.v))
-                wp = interior(on_architecture(cpu_arch, mp.velocities.w))
-                cp = interior(on_architecture(cpu_arch, mp.tracers.c))
-                ηp = interior(on_architecture(cpu_arch, mp.free_surface.η))
-
-                us = partition(us, cpu_arch, size(up))
-                vs = partition(vs, cpu_arch, size(vp))
-                ws = partition(ws, cpu_arch, size(wp))
-                cs = partition(cs, cpu_arch, size(cp))
-                ηs = partition(ηs, cpu_arch, size(ηp))
-
-                atol = eps(eltype(grid))
-                rtol = sqrt(eps(eltype(grid)))
-
-                @test all(isapprox(up, us; atol, rtol))
-                @test all(isapprox(vp, vs; atol, rtol))
-                @test all(isapprox(wp, ws; atol, rtol))
-                @test all(isapprox(cp, cs; atol, rtol))
-                @test all(isapprox(ηp, ηs; atol, rtol))
             end
 
             # CATKE works only with synchronized communication at the moment
             arch    = synchronized(arch)
             closure = CATKEVerticalDiffusivity()
 
-            if arch.local_rank == 0
-                @info "  Testing CATKE with $(ranks(arch)) ranks"
-            end
+            @root @info "  Testing CATKE with $(ranks(arch)) ranks"
 
             # "s" for "serial" computation, "p" for parallel
             ms = rotation_with_shear_test(global_underlying_grid, closure)
@@ -163,7 +159,6 @@ for arch in archs
 
             us = interior(on_architecture(CPU(), ms.velocities.u))
             vs = interior(on_architecture(CPU(), ms.velocities.v))
-            ws = interior(on_architecture(CPU(), ms.velocities.w))
             cs = interior(on_architecture(CPU(), ms.tracers.c))
             ηs = interior(on_architecture(CPU(), ms.free_surface.η))
 
@@ -171,13 +166,11 @@ for arch in archs
 
             up = interior(on_architecture(cpu_arch, mp.velocities.u))
             vp = interior(on_architecture(cpu_arch, mp.velocities.v))
-            wp = interior(on_architecture(cpu_arch, mp.velocities.w))
             cp = interior(on_architecture(cpu_arch, mp.tracers.c))
             ηp = interior(on_architecture(cpu_arch, mp.free_surface.η))
 
             us = partition(us, cpu_arch, size(up))
             vs = partition(vs, cpu_arch, size(vp))
-            ws = partition(ws, cpu_arch, size(wp))
             cs = partition(cs, cpu_arch, size(cp))
             ηs = partition(ηs, cpu_arch, size(ηp))
 
@@ -186,7 +179,6 @@ for arch in archs
 
             @test all(isapprox(up, us; atol, rtol))
             @test all(isapprox(vp, vs; atol, rtol))
-            @test all(isapprox(wp, ws; atol, rtol))
             @test all(isapprox(cp, cs; atol, rtol))
             @test all(isapprox(ηp, ηs; atol, rtol))
         end
diff --git a/test/test_dynamics.jl b/test/test_dynamics.jl
index a716c1a9db..2690160b2a 100644
--- a/test/test_dynamics.jl
+++ b/test/test_dynamics.jl
@@ -600,7 +600,9 @@ timesteppers = (:QuasiAdamsBashforth2, :RungeKutta3)
                     @test test_diffusion_cosine(fieldname, NonhydrostaticModel, :QuasiAdamsBashforth2, grid, closure, coord)
 
                     if fieldname != :w && topology(grid)[3] == Bounded                        
+                        @test test_diffusion_cosine(fieldname, HydrostaticFreeSurfaceModel, :SplitRungeKutta2, grid, closure, coord; free_surface=nothing)
                         @test test_diffusion_cosine(fieldname, HydrostaticFreeSurfaceModel, :SplitRungeKutta3, grid, closure, coord; free_surface=nothing)
+                        @test test_diffusion_cosine(fieldname, HydrostaticFreeSurfaceModel, :SplitRungeKutta5, grid, closure, coord; free_surface=nothing)
                         @test test_diffusion_cosine(fieldname, HydrostaticFreeSurfaceModel, :QuasiAdamsBashforth2, grid, closure, coord; free_surface = nothing)
                     end
                 end
@@ -665,11 +667,11 @@ timesteppers = (:QuasiAdamsBashforth2, :RungeKutta3)
         free_surface_types(::Val{:QuasiAdamsBashforth2}, g, grid) = (ImplicitFreeSurface(; gravitational_acceleration=g), 
                                                                      SplitExplicitFreeSurface(grid, ; gravitational_acceleration=g, cfl=0.5))
 
-        free_surface_types(::Val{:SplitRungeKutta3}, g, grid) = (SplitExplicitFreeSurface(grid; gravitational_acceleration=g, cfl=0.5), ) 
+        free_surface_types(split_runge_kutta, g, grid) = (SplitExplicitFreeSurface(grid; gravitational_acceleration=g, cfl=0.5), ) 
 
         @testset "Internal wave with HydrostaticFreeSurfaceModel" begin
             for grid in test_grids
-                for timestepper in (:QuasiAdamsBashforth2, :SplitRungeKutta3)
+                for timestepper in (:QuasiAdamsBashforth2, :SplitRungeKutta2, :SplitRungeKutta3, :SplitRungeKutta5) #timesteppers
                     grid_name = typeof(grid).name.wrapper
                     topo = topology(grid)
 
diff --git a/test/test_hydrostatic_regression.jl b/test/test_hydrostatic_regression.jl
index da6b83acef..a2bdb9d0ff 100644
--- a/test/test_hydrostatic_regression.jl
+++ b/test/test_hydrostatic_regression.jl
@@ -79,7 +79,7 @@ include("regression_tests/hydrostatic_free_turbulence_regression_test.jl")
                                                                    gravitational_acceleration = 1.0,
                                                                    substeps = 5)
 
-            for free_surface in [explicit_free_surface, implicit_free_surface, split_explicit_free_surface]
+            for free_surface in [explicit_free_surface] # implicit_free_surface, split_explicit_free_surface]
 
                 # GPU + ImplicitFreeSurface + precompute metrics cannot be tested on sverdrup at the moment
                 # because "uses too much parameter space (maximum 0x1100 bytes)" error
diff --git a/test/test_immersed_advection.jl b/test/test_immersed_advection.jl
index 53950f32b9..fd54cb8f4a 100644
--- a/test/test_immersed_advection.jl
+++ b/test/test_immersed_advection.jl
@@ -10,7 +10,9 @@ using Oceananigans.Advection:
         _biased_interpolate_xᶠᵃᵃ,
         _biased_interpolate_yᵃᶜᵃ,
         _biased_interpolate_yᵃᶠᵃ,
-        FluxFormAdvection
+        FluxFormAdvection,
+        LeftBias,
+        RightBias
 
 linear_advection_schemes = [Centered, UpwindBiased]
 advection_schemes = [linear_advection_schemes... WENO]
@@ -24,10 +26,10 @@ function run_tracer_interpolation_test(c, ibg, scheme)
         if typeof(scheme) <: Centered
             @test @allowscalar  _symmetric_interpolate_xᶠᵃᵃ(i+1, j, 1, ibg, scheme, c) ≈ 1.0
         else
-            @test @allowscalar _biased_interpolate_xᶠᵃᵃ(i+1, j, 1, ibg, scheme, true,  c) ≈ 1.0
-            @test @allowscalar _biased_interpolate_xᶠᵃᵃ(i+1, j, 1, ibg, scheme, false, c) ≈ 1.0
-            @test @allowscalar _biased_interpolate_yᵃᶠᵃ(i, j+1, 1, ibg, scheme, true,  c) ≈ 1.0
-            @test @allowscalar _biased_interpolate_yᵃᶠᵃ(i, j+1, 1, ibg, scheme, false, c) ≈ 1.0
+            @test @allowscalar _biased_interpolate_xᶠᵃᵃ(i+1, j, 1, ibg, scheme, LeftBias(),  c) ≈ 1.0
+            @test @allowscalar _biased_interpolate_xᶠᵃᵃ(i+1, j, 1, ibg, scheme, RightBias(), c) ≈ 1.0
+            @test @allowscalar _biased_interpolate_yᵃᶠᵃ(i, j+1, 1, ibg, scheme, LeftBias(),  c) ≈ 1.0
+            @test @allowscalar _biased_interpolate_yᵃᶠᵃ(i, j+1, 1, ibg, scheme, RightBias(), c) ≈ 1.0
         end
     end
 end
@@ -77,15 +79,15 @@ function run_momentum_interpolation_test(u, v, ibg, scheme)
             @test @allowscalar  _symmetric_interpolate_yᵃᶜᵃ(i, j+1, 1, ibg, scheme, u) ≈ 1.0
             @test @allowscalar  _symmetric_interpolate_yᵃᶜᵃ(i, j+1, 1, ibg, scheme, v) ≈ 1.0
         else
-            @test @allowscalar _biased_interpolate_xᶜᵃᵃ(i+1, j, 1, ibg, scheme, true,  u) ≈ 1.0
-            @test @allowscalar _biased_interpolate_xᶜᵃᵃ(i+1, j, 1, ibg, scheme, false, u) ≈ 1.0
-            @test @allowscalar _biased_interpolate_yᵃᶜᵃ(i, j+1, 1, ibg, scheme, true,  u) ≈ 1.0
-            @test @allowscalar _biased_interpolate_yᵃᶜᵃ(i, j+1, 1, ibg, scheme, false, u) ≈ 1.0
-
-            @test @allowscalar _biased_interpolate_xᶜᵃᵃ(i+1, j, 1, ibg, scheme, true,  v) ≈ 1.0
-            @test @allowscalar _biased_interpolate_xᶜᵃᵃ(i+1, j, 1, ibg, scheme, false, v) ≈ 1.0
-            @test @allowscalar _biased_interpolate_yᵃᶜᵃ(i, j+1, 1, ibg, scheme, true,  v) ≈ 1.0
-            @test @allowscalar _biased_interpolate_yᵃᶜᵃ(i, j+1, 1, ibg, scheme, false, v) ≈ 1.0
+            @test @allowscalar _biased_interpolate_xᶜᵃᵃ(i+1, j, 1, ibg, scheme, LeftBias(),  u) ≈ 1.0
+            @test @allowscalar _biased_interpolate_xᶜᵃᵃ(i+1, j, 1, ibg, scheme, RightBias(), u) ≈ 1.0
+            @test @allowscalar _biased_interpolate_yᵃᶜᵃ(i, j+1, 1, ibg, scheme, LeftBias(),  u) ≈ 1.0
+            @test @allowscalar _biased_interpolate_yᵃᶜᵃ(i, j+1, 1, ibg, scheme, RightBias(), u) ≈ 1.0
+
+            @test @allowscalar _biased_interpolate_xᶜᵃᵃ(i+1, j, 1, ibg, scheme, LeftBias(),  v) ≈ 1.0
+            @test @allowscalar _biased_interpolate_xᶜᵃᵃ(i+1, j, 1, ibg, scheme, RightBias(), v) ≈ 1.0
+            @test @allowscalar _biased_interpolate_yᵃᶜᵃ(i, j+1, 1, ibg, scheme, LeftBias(),  v) ≈ 1.0
+            @test @allowscalar _biased_interpolate_yᵃᶜᵃ(i, j+1, 1, ibg, scheme, RightBias(), v) ≈ 1.0
         end
     end
 
diff --git a/test/test_immersed_implicit_free_surface.jl b/test/test_immersed_implicit_free_surface.jl
index e642fbec9c..7b4dce34f7 100644
--- a/test/test_immersed_implicit_free_surface.jl
+++ b/test/test_immersed_implicit_free_surface.jl
@@ -6,7 +6,7 @@ using Oceananigans.TurbulenceClosures
 using Oceananigans.Models.HydrostaticFreeSurfaceModels: compute_vertically_integrated_volume_flux!,
                                                         compute_implicit_free_surface_right_hand_side!,
                                                         step_free_surface!,
-                                                        make_pressure_correction!
+                                                        correct_barotropic_mode!
 
 @testset "Immersed boundaries test divergent flow solve with hydrostatic free surface models" begin
     for arch in archs
diff --git a/test/test_implicit_diffusion_diagnostic.jl b/test/test_implicit_diffusion_diagnostic.jl
index c2b8fb36fb..257edcbada 100644
--- a/test/test_implicit_diffusion_diagnostic.jl
+++ b/test/test_implicit_diffusion_diagnostic.jl
@@ -135,7 +135,7 @@ end
     for arch in archs
         schedules = [IterationInterval(1), IterationInterval(10), IterationInterval(100)]
         for schedule in schedules
-            for timestepper in (:QuasiAdamsBashforth2, :SplitRungeKutta3)
+            for timestepper in (:QuasiAdamsBashforth2, :SplitRungeKutta2, :SplitRungeKutta3, :SplitRungeKutta5) #timesteppers
                 @testset "Implicit Diffusion on $schedule schedule and $timestepper, [$(typeof(arch))]" begin
                     @info "  Testing implicit diffusion diagnostic [$(typeof(arch))] with $schedule and $timestepper, in x-direction..."
                     test_implicit_diffusion_diagnostic(arch, :x, timestepper, schedule)
diff --git a/test/test_implicit_free_surface_solver.jl b/test/test_implicit_free_surface_solver.jl
index b6fd5a2128..022931b83b 100644
--- a/test/test_implicit_free_surface_solver.jl
+++ b/test/test_implicit_free_surface_solver.jl
@@ -7,7 +7,6 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels:
     ImplicitFreeSurface,
     FFTImplicitFreeSurfaceSolver,
     PCGImplicitFreeSurfaceSolver,
-    compute_vertically_integrated_lateral_areas!,
     step_free_surface!,
     implicit_free_surface_linear_operation!
 
@@ -67,14 +66,11 @@ function run_implicit_free_surface_solver_tests(arch, grid, free_surface)
     g = Oceananigans.defaults.gravitational_acceleration
     η = model.free_surface.η
 
-    ∫ᶻ_Axᶠᶜᶜ = Field{Face, Center, Nothing}(grid)
-    ∫ᶻ_Ayᶜᶠᶜ = Field{Center, Face, Nothing}(grid)
+    ∫ᶻ_Axᶠᶜᶜ = KernelFunctionOperation{Face, Center, Nothing}(Oceananigans.Models.HydrostaticFreeSurfaceModels.integrated_x_area, grid)
+    ∫ᶻ_Ayᶜᶠᶜ = KernelFunctionOperation{Center, Face, Nothing}(Oceananigans.Models.HydrostaticFreeSurfaceModels.integrated_y_area, grid)
 
     vertically_integrated_lateral_areas = (xᶠᶜᶜ = ∫ᶻ_Axᶠᶜᶜ, yᶜᶠᶜ = ∫ᶻ_Ayᶜᶠᶜ)
 
-    compute_vertically_integrated_lateral_areas!(vertically_integrated_lateral_areas)
-    fill_halo_regions!(vertically_integrated_lateral_areas)
-
     left_hand_side = ZFaceField(grid, indices = (:, :, grid.Nz + 1))
     implicit_free_surface_linear_operation!(left_hand_side, η, ∫ᶻ_Axᶠᶜᶜ, ∫ᶻ_Ayᶜᶠᶜ, g, Δt)
 
diff --git a/test/test_split_explicit_free_surface_solver.jl b/test/test_split_explicit_free_surface_solver.jl
index eb3f7d1263..973d5e6ca3 100644
--- a/test/test_split_explicit_free_surface_solver.jl
+++ b/test/test_split_explicit_free_surface_solver.jl
@@ -9,6 +9,10 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels.SplitExplicitFreeSurfaces
                                                                                   SplitExplicitFreeSurface,
                                                                                   iterate_split_explicit!
 
+@inline noforcing(args...) = 0
+
+clock = Clock(time=0)
+
 @testset "Split-Explicit Dynamics" begin
 
     for FT in float_types
@@ -36,7 +40,7 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels.SplitExplicitFreeSurfaces
             @testset " One timestep test " begin
                 state = sefs.filtered_state
                 U, V  = sefs.barotropic_velocities
-                η̅, U̅, V̅ = state.η, state.U, state.V
+                η̅, U̅, V̅ = state.η̅, state.U̅, state.V̅
 
                 η = sefs.η
                 Δτ = 1.0
@@ -45,9 +49,9 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels.SplitExplicitFreeSurfaces
                 set!(η, η₀)
 
                 Nsubsteps = calculate_substeps(sefs.substepping, 1)
-                fractional_Δt, weights = calculate_adaptive_settings(sefs.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights
+                fractional_Δt, weights, transport_weights = calculate_adaptive_settings(sefs.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights
 
-                iterate_split_explicit!(sefs, grid, GU, GV, Δτ, weights, Val(1))
+                iterate_split_explicit!(sefs, grid, GU, GV, Δτ, noforcing, clock, weights, transport_weights, Val(1))
 
                 U_computed = Array(U.data.parent)[2:Nx+1, 2:Ny+1]
                 U_exact = (reshape(-cos.(grid.xᶠᵃᵃ), (length(grid.xᶜᵃᵃ), 1)).+reshape(0 * grid.yᵃᶜᵃ, (1, length(grid.yᵃᶜᵃ))))[2:Nx+1, 2:Ny+1]
@@ -58,7 +62,7 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels.SplitExplicitFreeSurfaces
             @testset "Multi-timestep test " begin
                 state = sefs.filtered_state
                 U, V = sefs.barotropic_velocities
-                η̅, U̅, V̅ = state.η, state.U, state.V
+                η̅, U̅, V̅ = state.η̅, state.U̅, state.V̅
                 η = sefs.η
 
                 T  = 2π
@@ -84,9 +88,9 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels.SplitExplicitFreeSurfaces
                 weights = sefs.substepping.averaging_weights
 
                 for _ in 1:Nt
-                    iterate_split_explicit!(sefs, grid, GU, GV, Δτ, weights, Val(1))
+                    iterate_split_explicit!(sefs, grid, GU, GV, Δτ, noforcing, clock, weights, weights, Val(1))
                 end
-                iterate_split_explicit!(sefs, grid, GU, GV, Δτ_end, weights, Val(1))
+                iterate_split_explicit!(sefs, grid, GU, GV, Δτ, noforcing, clock, weights, weights, Val(1))
 
                 U_computed = Array(deepcopy(interior(U)))
                 η_computed = Array(deepcopy(interior(η)))
@@ -107,7 +111,7 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels.SplitExplicitFreeSurfaces
             @testset "Averaging / Do Nothing test " begin
                 state = sefs.filtered_state
                 U, V  = sefs.barotropic_velocities
-                η̅, U̅, V̅ = state.η, state.U, state.V
+                η̅, U̅, V̅ = state.η̅, state.U̅, state.V̅
                 η = sefs.η
                 g = sefs.gravitational_acceleration
 
@@ -129,10 +133,10 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels.SplitExplicitFreeSurfaces
 
 
                 Nsubsteps  = calculate_substeps(sefs.substepping, 1)
-                fractional_Δt, weights = calculate_adaptive_settings(sefs.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights
+                fractional_Δt, weights, transport_weights = calculate_adaptive_settings(sefs.substepping, Nsubsteps) # barotropic time step in fraction of baroclinic step and averaging weights
 
                 for step in 1:Nsubsteps
-                    iterate_split_explicit!(sefs, grid, GU, GV, Δτ, weights, Val(1))
+                    iterate_split_explicit!(sefs, grid, GU, GV, Δτ, noforcing, clock, weights, transport_weights, Val(1))
                 end
 
                 U_computed = Array(deepcopy(interior(U)))
@@ -171,7 +175,7 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels.SplitExplicitFreeSurfaces
 
                 state = sefs.filtered_state
                 U, V = sefs.barotropic_velocities
-                η̅, U̅, V̅ = state.η, state.U, state.V
+                η̅, U̅, V̅ = state.η̅, state.U̅, state.V̅
                 η = sefs.η
                 g = sefs.gravitational_acceleration
 
@@ -193,9 +197,9 @@ using Oceananigans.Models.HydrostaticFreeSurfaceModels.SplitExplicitFreeSurfaces
 
                 weights = sefs.substepping.averaging_weights
                 for i in 1:Nt
-                    iterate_split_explicit!(sefs, grid, GU, GV, Δτ, weights, Val(1))
+                    iterate_split_explicit!(sefs, grid, GU, GV, Δτ, noforcing, clock, weights, weights, Val(1))
                 end
-                iterate_split_explicit!(sefs, grid, GU, GV, Δτ_end, weights, Val(1))
+                iterate_split_explicit!(sefs, grid, GU, GV, Δτ, noforcing, clock, weights, weights, Val(1))
 
                 η_mean_after = mean(Array(interior(η)))
 
diff --git a/test/test_tripolar_grid.jl b/test/test_tripolar_grid.jl
index c233932f64..490c91158b 100644
--- a/test/test_tripolar_grid.jl
+++ b/test/test_tripolar_grid.jl
@@ -98,7 +98,7 @@ end
 
         @test Hx == halo_size(grid, 1)
         @test Hy != halo_size(grid, 2)
-        @test Hy == length(free_surface.substepping.averaging_weights) + 1
+        @test Hy == length(free_surface.substepping.averaging_weights) + 2
 
         @test begin
             time_step!(model, 1.0)
diff --git a/test/test_zstar_conservation.jl b/test/test_zstar_conservation.jl
new file mode 100644
index 0000000000..c69a3e1988
--- /dev/null
+++ b/test/test_zstar_conservation.jl
@@ -0,0 +1,178 @@
+include("dependencies_for_runtests.jl")
+
+using Random
+using Oceananigans: initialize!
+using Oceananigans.ImmersedBoundaries: PartialCellBottom
+using Oceananigans.Grids: MutableVerticalDiscretization
+using Oceananigans.Models: ZStarCoordinate, ZCoordinate
+
+grid_type(::RectilinearGrid{F, X, Y}) where {F, X, Y} = "Rect{$X, $Y}"
+grid_type(::LatitudeLongitudeGrid{F, X, Y}) where {F, X, Y} = "LatLon{$X, $Y}"
+
+grid_type(g::ImmersedBoundaryGrid) = "Immersed" * grid_type(g.underlying_grid)
+
+function info_message(grid, free_surface, timestepper)
+    msg1 = "$(typeof(architecture(grid))) "
+    msg2 = grid_type(grid)
+    msg3 = " with $(timestepper)"
+    msg4 = " using a " * string(getnamewrapper(free_surface))
+    return msg1 * msg2 * msg3 * msg4
+end
+
+# QuasiAdamsBashforth2 does not guarantee local conservation of tracers
+test_local_conservation(timestepper) = timestepper != :QuasiAdamsBashforth2
+
+@testset "ZStarCoordinate tracer conservation testset" begin
+    z_stretched = MutableVerticalDiscretization(collect(-20:0))
+    topologies  = ((Periodic, Periodic, Bounded),
+                   (Periodic, Bounded, Bounded),
+                   (Bounded, Periodic, Bounded),
+                   (Bounded, Bounded, Bounded))
+
+    for arch in archs
+        for topology in topologies
+            Random.seed!(1234)
+
+            rtgv = RectilinearGrid(arch; size = (10, 10, 20), x = (0, 100kilometers), y = (-10kilometers, 10kilometers), topology, z = z_stretched)
+            irtgv = ImmersedBoundaryGrid(deepcopy(rtgv),  GridFittedBottom((x, y) -> rand() - 10))
+            prtgv = ImmersedBoundaryGrid(deepcopy(rtgv), PartialCellBottom((x, y) -> rand() - 10))
+
+            if topology[2] == Bounded
+                llgv = LatitudeLongitudeGrid(arch; size = (10, 10, 20), latitude = (0, 1), longitude = (0, 1), topology, z = z_stretched)
+
+                illgv = ImmersedBoundaryGrid(deepcopy(llgv),  GridFittedBottom((x, y) -> rand() - 10))
+                pllgv = ImmersedBoundaryGrid(deepcopy(llgv), PartialCellBottom((x, y) -> rand() - 10))
+
+                # TODO: Partial cell bottom are broken at the moment and do not account for the Δz in the volumes
+                # and vertical areas (see https://github.com/CliMA/Oceananigans.jl/issues/3958)
+                # When this is issue is fixed we can add the partial cells to the testing.
+                grids = [llgv, rtgv, illgv, irtgv] # , pllgv, prtgv]
+            else
+                grids = [rtgv, irtgv] #, prtgv]
+            end
+
+            for grid in grids
+                split_free_surface    = SplitExplicitFreeSurface(grid; substeps=100)
+                implicit_free_surface = ImplicitFreeSurface(solver_method=:PreconditionedConjugateGradient)
+                explicit_free_surface = ExplicitFreeSurface()
+
+                for free_surface in [split_free_surface, explicit_free_surface]
+
+                    # TODO: There are parameter space issues with ImplicitFreeSurface and a immersed LatitudeLongitudeGrid
+                    # For the moment we are skipping these tests.
+                    if (arch isa GPU) &&
+                       (free_surface isa ImplicitFreeSurface) &&
+                       (grid isa ImmersedBoundaryGrid) &&
+                       (grid.underlying_grid isa LatitudeLongitudeGrid)
+
+                        @info "  Skipping $(info_message(grid, free_surface)) because of parameter space issues"
+                        continue
+                    end
+
+                    for timestepper in (:QuasiAdamsBashforth2, :SplitRungeKutta3)
+
+                        info_msg = info_message(grid, free_surface, timestepper)
+                        if timestepper == :QuasiAdamsBashforth2
+                            @info "  Skipping local conservation test for QuasiAdamsBashforth2 time stepping, which does not guarantee conservation of tracers."
+                        else
+                            @testset "$info_msg" begin
+                                @info "  Testing a $info_msg"
+                                model = HydrostaticFreeSurfaceModel(; grid = deepcopy(grid),
+                                                                    free_surface,
+                                                                    tracers = (:b, :c, :constant),
+                                                                    timestepper,
+                                                                    buoyancy = BuoyancyTracer(),
+                                                                    vertical_coordinate = ZStarCoordinate())
+
+                                bᵢ(x, y, z) = x < grid.Lx / 2 ? 0.06 : 0.01
+
+                                set!(model, c = (x, y, z) -> rand(), b = bᵢ, constant = 1)
+
+                                Δt = free_surface isa ExplicitFreeSurface ? 10 : 2minutes
+                                test_zstar_coordinate(model, 100, Δt, test_local_conservation(timestepper))
+                            end
+                        end
+                    end
+                end
+            end
+        end
+        
+        @testset "RungeKutta5 ZStarCoordinate tracer conservation tests" begin
+            @info "  Testing a ZStarCoordinate and Runge-Kutta 5th order time stepping"
+
+            topology = topologies[2]
+            rtg  = RectilinearGrid(arch; size=(10, 10, 20), x=(0, 100kilometers), y=(-10kilometers, 10kilometers), topology, z=z_stretched)
+            llg  = LatitudeLongitudeGrid(arch; size=(10, 10, 20), latitude=(0, 1), longitude=(0, 1), topology, z=z_stretched)
+            irtg = ImmersedBoundaryGrid(deepcopy(rtg), GridFittedBottom((x, y) -> rand()-10))
+            illg = ImmersedBoundaryGrid(deepcopy(llg), GridFittedBottom((x, y) -> rand()-10))
+
+            for grid in [rtg, llg] # , irtg, illg]
+                split_free_surface = SplitExplicitFreeSurface(grid; substeps=50)
+                model = HydrostaticFreeSurfaceModel(; grid,
+                                                    free_surface = split_free_surface,
+                                                    tracers = (:b, :c, :constant),
+                                                    timestepper = :SplitRungeKutta5,
+                                                    buoyancy = BuoyancyTracer(),
+                                                    vertical_coordinate = ZStarCoordinate())
+
+                bᵢ(x, y, z) = x < grid.Lx / 2 ? 0.06 : 0.01
+
+                set!(model, c = (x, y, z) -> rand(), b = bᵢ, constant = 1)
+
+                Δt = 2minutes
+                test_zstar_coordinate(model, 100, Δt)
+            end
+        end
+
+        @testset "TripolarGrid ZStarCoordinate tracer conservation tests" begin
+            @info "Testing a ZStarCoordinate coordinate with a Tripolar grid on $(arch)..."
+
+            grid = TripolarGrid(arch; size = (20, 20, 20), z = z_stretched)
+
+            # Code credit:
+            # https://github.com/PRONTOLab/GB-25/blob/682106b8487f94da24a64d93e86d34d560f33ffc/src/model_utils.jl#L65
+            function mtn₁(λ, φ)
+                λ₁ = 70
+                φ₁ = 55
+                dφ = 5
+                return exp(-((λ - λ₁)^2 + (φ - φ₁)^2) / 2dφ^2)
+            end
+
+            function mtn₂(λ, φ)
+                λ₁ = 70
+                λ₂ = λ₁ + 180
+                φ₂ = 55
+                dφ = 5
+                return exp(-((λ - λ₂)^2 + (φ - φ₂)^2) / 2dφ^2)
+            end
+
+            zb = - 20
+            h  = - zb + 10
+            gaussian_islands(λ, φ) = zb + h * (mtn₁(λ, φ) + mtn₂(λ, φ))
+
+            grid = ImmersedBoundaryGrid(grid, GridFittedBottom(gaussian_islands))
+            free_surface = SplitExplicitFreeSurface(grid; substeps=10)
+
+            model = HydrostaticFreeSurfaceModel(; grid,
+                                                  free_surface,
+                                                  tracers = (:b, :c, :constant),
+                                                  buoyancy = BuoyancyTracer(),
+                                                  timestepper = :SplitRungeKutta3,
+                                                  vertical_coordinate = ZStarCoordinate())
+
+            bᵢ(x, y, z) = y < 0 ? 0.06 : 0.01
+
+            # Instead of initializing with random velocities, infer them from a random initial streamfunction
+            # to ensure the velocity field is divergence-free at initialization.
+            ψ = Field{Center, Center, Center}(grid)
+            set!(ψ, rand(size(ψ)...))
+            uᵢ = ∂y(ψ)
+            vᵢ = -∂x(ψ)
+
+            set!(model, c = (x, y, z) -> rand(), u = uᵢ, v = vᵢ, b = bᵢ, constant = 1)
+
+            Δt = 2minutes
+            test_zstar_coordinate(model, 300, Δt)
+        end
+    end
+end
diff --git a/test/test_zstar_coordinate.jl b/test/test_zstar_coordinate.jl
index 4d611df329..cc0a68bc3f 100644
--- a/test/test_zstar_coordinate.jl
+++ b/test/test_zstar_coordinate.jl
@@ -6,7 +6,7 @@ using Oceananigans.ImmersedBoundaries: PartialCellBottom
 using Oceananigans.Grids: MutableVerticalDiscretization
 using Oceananigans.Models: ZStarCoordinate, ZCoordinate
 
-function test_zstar_coordinate(model, Ni, Δt)
+function test_zstar_coordinate(model, Ni, Δt, test_local_conservation=true)
 
     bᵢ = deepcopy(model.tracers.b)
     cᵢ = deepcopy(model.tracers.c)
@@ -28,44 +28,33 @@ function test_zstar_coordinate(model, Ni, Δt)
         compute!(∫c)
 
         condition = interior(∫b, 1, 1, 1) ≈ interior(∫bᵢ, 1, 1, 1)
-        @test condition
         if !condition
-                @info "Stopping early: buoyancy not conserved at step $step"
-            break
+            @info "Stopping early: buoyancy not conserved at step $step"
         end
+        @test condition
 
         condition = interior(∫c, 1, 1, 1) ≈ interior(∫cᵢ, 1, 1, 1)
-        @test condition
         if !condition
-                @info "Stopping early: c tracer not conserved at step $step"
-            break
+            @info "Stopping early: c tracer not conserved at step $step"
         end
+        @test condition
 
         condition = maximum(abs, interior(w, :, :, Nz+1)) < eps(eltype(w))
-        @test condition
         if !condition
-                @info "Stopping early: nonzero vertical velocity at top at step $step"
-            break
+            @info "Stopping early: nonzero vertical velocity at top at step $step"
         end
+        @test condition
 
         # Constancy preservation test
-        @test maximum(model.tracers.constant) ≈ 1
-        @test minimum(model.tracers.constant) ≈ 1
+        if test_local_conservation
+            @test maximum(model.tracers.constant) ≈ 1
+            @test minimum(model.tracers.constant) ≈ 1
+        end
     end
 
     return nothing
 end
 
-function info_message(grid, free_surface)
-    msg1 = "$(typeof(architecture(grid))) "
-    msg2 = string(getnamewrapper(grid))
-    msg3 = grid isa ImmersedBoundaryGrid ? " on a " * string(getnamewrapper(grid.underlying_grid)) : ""
-    msg4 = grid.z.Δᵃᵃᶠ isa Number ? " with uniform spacing" : " with stretched spacing"
-    msg5 = grid isa ImmersedBoundaryGrid ? " and $(string(getnamewrapper(grid.immersed_boundary))) immersed boundary" : ""
-    msg6 = " using a " * string(getnamewrapper(free_surface))
-    return msg1 * msg2 * msg3 * msg4 * msg5 * msg6
-end
-
 const C = Center
 const F = Face
 
@@ -125,8 +114,8 @@ end
     for arch in archs
         c₀ = rand(15)
 
-        grid_static = RectilinearGrid(arch; size=15, z=z_static, topology=(Flat, Flat, Bounded))
-        grid_moving = RectilinearGrid(arch; size=15, z=z_moving, topology=(Flat, Flat, Bounded))
+        grid_static = RectilinearGrid(arch; size=(1, 1, 15), x=(0, 1), y=(0, 1), z=z_static, topology=(Periodic, Periodic, Bounded))
+        grid_moving = RectilinearGrid(arch; size=(1, 1, 15), x=(0, 1), y=(0, 1), z=z_moving, topology=(Periodic, Periodic, Bounded))
 
         fill!(grid_moving.z.ηⁿ,   5)
         fill!(grid_moving.z.σᶜᶜ⁻, 1.5)
@@ -136,8 +125,8 @@ end
         fill!(grid_moving.z.σᶠᶜⁿ, 1.5)
 
         for TD in (ExplicitTimeDiscretization, VerticallyImplicitTimeDiscretization)
-            for timestepper in (:QuasiAdamsBashforth2, :SplitRungeKutta3)
-                for c_bcs in (NoFluxBoundaryCondition(), FluxBoundaryCondition(0.01), ValueBoundaryCondition(0.01))
+            for timestepper in (:QuasiAdamsBashforth2, :SplitRungeKutta3, :SplitRungeKutta5) #timesteppers
+                for c_bcs in (FluxBoundaryCondition(nothing), FluxBoundaryCondition(0.01), ValueBoundaryCondition(0.01))
                     @info "testing ZStarCoordinate diffusion on $(typeof(arch)) with $TD, $timestepper, and $c_bcs at the top"
 
                     model_static = HydrostaticFreeSurfaceModel(; grid = grid_static,
@@ -152,8 +141,8 @@ end
                                                                 boundary_conditions = (; c = FieldBoundaryConditions(top=c_bcs)),
                                                                 closure = VerticalScalarDiffusivity(TD(), κ=0.1))
 
-                    set!(model_static, c = c₀)
-                    set!(model_moving, c = c₀)
+                    set!(model_static, c=c₀)
+                    set!(model_moving, c=c₀, η=5)
 
                     for _ in 1:1000
                         time_step!(model_static, 1.0)
@@ -166,147 +155,3 @@ end
         end
     end
 end
-
-@testset "ZStarCoordinate tracer conservation testset" begin
-    z_stretched = MutableVerticalDiscretization(collect(-20:0))
-    topologies  = ((Periodic, Periodic, Bounded),
-                   (Periodic, Bounded, Bounded),
-                   (Bounded, Periodic, Bounded),
-                   (Bounded, Bounded, Bounded))
-
-    for arch in archs
-        for topology in topologies
-            Random.seed!(1234)
-
-            rtgv = RectilinearGrid(arch; size = (10, 10, 20), x = (0, 100kilometers), y = (-10kilometers, 10kilometers), topology, z = z_stretched)
-            irtgv = ImmersedBoundaryGrid(deepcopy(rtgv),  GridFittedBottom((x, y) -> rand() - 10))
-            prtgv = ImmersedBoundaryGrid(deepcopy(rtgv), PartialCellBottom((x, y) -> rand() - 10))
-
-            if topology[2] == Bounded
-                llgv = LatitudeLongitudeGrid(arch; size = (10, 10, 20), latitude = (0, 1), longitude = (0, 1), topology, z = z_stretched)
-
-                illgv = ImmersedBoundaryGrid(deepcopy(llgv),  GridFittedBottom((x, y) -> rand() - 10))
-                pllgv = ImmersedBoundaryGrid(deepcopy(llgv), PartialCellBottom((x, y) -> rand() - 10))
-
-                # TODO: Partial cell bottom are broken at the moment and do not account for the Δz in the volumes
-                # and vertical areas (see https://github.com/CliMA/Oceananigans.jl/issues/3958)
-                # When this is issue is fixed we can add the partial cells to the testing.
-                grids = [llgv, rtgv, illgv, irtgv] # , pllgv, prtgv]
-            else
-                grids = [rtgv, irtgv] #, prtgv]
-            end
-
-            for grid in grids
-                split_free_surface    = SplitExplicitFreeSurface(grid; cfl = 0.75)
-                implicit_free_surface = ImplicitFreeSurface()
-                explicit_free_surface = ExplicitFreeSurface()
-
-                for free_surface in [split_free_surface, implicit_free_surface, explicit_free_surface]
-
-                    # TODO: There are parameter space issues with ImplicitFreeSurface and a immersed LatitudeLongitudeGrid
-                    # For the moment we are skipping these tests.
-                    if (arch isa GPU) &&
-                       (free_surface isa ImplicitFreeSurface) &&
-                       (grid isa ImmersedBoundaryGrid) &&
-                       (grid.underlying_grid isa LatitudeLongitudeGrid)
-
-                        @info "  Skipping $(info_message(grid, free_surface)) because of parameter space issues"
-                        continue
-                    end
-
-                    info_msg = info_message(grid, free_surface)
-                    @testset "$info_msg" begin
-                        @info "  Testing a $info_msg"
-                        model = HydrostaticFreeSurfaceModel(; grid,
-                                                            free_surface,
-                                                            tracers = (:b, :c, :constant),
-                            				                buoyancy = BuoyancyTracer(),
-                                                            vertical_coordinate = ZStarCoordinate(grid))
-
-                        bᵢ(x, y, z) = x < grid.Lx / 2 ? 0.06 : 0.01
-
-                        set!(model, c = (x, y, z) -> rand(), b = bᵢ, constant = 1)
-
-                        Δt = free_surface isa ExplicitFreeSurface ? 10 : 2minutes
-                        test_zstar_coordinate(model, 100, Δt)
-                    end
-                end
-            end
-        end
-
-        @info "  Testing a ZStarCoordinate and Runge-Kutta 3rd order time stepping"
-
-        topology = topologies[2]
-        rtg  = RectilinearGrid(arch; size=(10, 10, 20), x=(0, 100kilometers), y=(-10kilometers, 10kilometers), topology, z=z_stretched)
-        llg  = LatitudeLongitudeGrid(arch; size=(10, 10, 20), latitude=(0, 1), longitude=(0, 1), topology, z=z_stretched)
-        irtg = ImmersedBoundaryGrid(deepcopy(rtg), GridFittedBottom((x, y) -> rand()-10))
-        illg = ImmersedBoundaryGrid(deepcopy(llg), GridFittedBottom((x, y) -> rand()-10))
-
-        for grid in [rtg, llg, irtg, illg]
-            split_free_surface = SplitExplicitFreeSurface(grid; substeps=50)
-            model = HydrostaticFreeSurfaceModel(; grid,
-                                                free_surface = split_free_surface,
-                                                tracers = (:b, :c, :constant),
-                                                timestepper = :SplitRungeKutta3,
-                                                buoyancy = BuoyancyTracer(),
-                                                vertical_coordinate = ZStarCoordinate(grid))
-
-            bᵢ(x, y, z) = x < grid.Lx / 2 ? 0.06 : 0.01
-
-            set!(model, c = (x, y, z) -> rand(), b = bᵢ, constant = 1)
-
-            Δt = 2minutes
-            test_zstar_coordinate(model, 100, Δt)
-        end
-
-        @testset "TripolarGrid ZStarCoordinate tracer conservation tests" begin
-            @info "Testing a ZStarCoordinate coordinate with a Tripolar grid on $(arch)..."
-
-            grid = TripolarGrid(arch; size = (20, 20, 20), z = z_stretched)
-
-            # Code credit:
-            # https://github.com/PRONTOLab/GB-25/blob/682106b8487f94da24a64d93e86d34d560f33ffc/src/model_utils.jl#L65
-            function mtn₁(λ, φ)
-                λ₁ = 70
-                φ₁ = 55
-                dφ = 5
-                return exp(-((λ - λ₁)^2 + (φ - φ₁)^2) / 2dφ^2)
-            end
-
-            function mtn₂(λ, φ)
-                λ₁ = 70
-                λ₂ = λ₁ + 180
-                φ₂ = 55
-                dφ = 5
-                return exp(-((λ - λ₂)^2 + (φ - φ₂)^2) / 2dφ^2)
-            end
-
-            zb = - 20
-            h  = - zb + 10
-            gaussian_islands(λ, φ) = zb + h * (mtn₁(λ, φ) + mtn₂(λ, φ))
-
-            grid = ImmersedBoundaryGrid(grid, GridFittedBottom(gaussian_islands))
-            free_surface = SplitExplicitFreeSurface(grid; substeps=10)
-
-            model = HydrostaticFreeSurfaceModel(; grid,
-                                                  free_surface,
-                                                  tracers = (:b, :c, :constant),
-                                                  buoyancy = BuoyancyTracer(),
-                                                  vertical_coordinate = ZStarCoordinate(grid))
-
-            bᵢ(x, y, z) = y < 0 ? 0.06 : 0.01
-
-            # Instead of initializing with random velocities, infer them from a random initial streamfunction
-            # to ensure the velocity field is divergence-free at initialization.
-            ψ = Field{Center, Center, Center}(grid)
-            set!(ψ, rand(size(ψ)...))
-            uᵢ = ∂y(ψ)
-            vᵢ = -∂x(ψ)
-
-            set!(model, c = (x, y, z) -> rand(), u = uᵢ, v = vᵢ, b = bᵢ, constant = 1)
-
-            Δt = 2minutes
-            test_zstar_coordinate(model, 300, Δt)
-        end
-    end
-end
diff --git a/test/utils_for_runtests.jl b/test/utils_for_runtests.jl
index 4037757f12..6f1284d34e 100644
--- a/test/utils_for_runtests.jl
+++ b/test/utils_for_runtests.jl
@@ -43,12 +43,12 @@ function test_architectures()
     # `Partition(x = 2, y = 2)`, and different fractional subdivisions in x, y and xy
     if mpi_test
         if MPI.Initialized() && MPI.Comm_size(MPI.COMM_WORLD) == 4
-            return (Distributed(child_arch; partition=Partition(4)),
-                    Distributed(child_arch; partition=Partition(1, 4)),
-                    Distributed(child_arch; partition=Partition(2, 2)),
-                    Distributed(child_arch; partition=Partition(x = Fractional(1, 2, 3, 4))),
-                    Distributed(child_arch; partition=Partition(y = Fractional(1, 2, 3, 4))),
-                    Distributed(child_arch; partition=Partition(x = Fractional(1, 2), y = Equal())))
+            return (Distributed(child_arch; synchronized_communication=true, partition=Partition(4)),
+                    Distributed(child_arch; synchronized_communication=true, partition=Partition(1, 4)),
+                    Distributed(child_arch; synchronized_communication=true, partition=Partition(2, 2)),
+                    Distributed(child_arch; synchronized_communication=true, partition=Partition(x = Fractional(1, 2, 3, 4))),
+                    Distributed(child_arch; synchronized_communication=true, partition=Partition(y = Fractional(1, 2, 3, 4))),
+                    Distributed(child_arch; synchronized_communication=true, partition=Partition(x = Fractional(1, 2), y = Equal())))
         else
             return throw("The MPI partitioning is not correctly configured.")
         end
diff --git a/validation/advection/one_dimensional_advection.jl b/validation/advection/one_dimensional_advection.jl
index f194da3b46..c784d9634e 100644
--- a/validation/advection/one_dimensional_advection.jl
+++ b/validation/advection/one_dimensional_advection.jl
@@ -1,4 +1,5 @@
 using Oceananigans
+using Oceananigans.TimeSteppers: SplitRungeKuttaTimeStepper
 
 N = 200
 
@@ -29,7 +30,7 @@ a = 0.5
     end
 end
 
-Δt_max = 0.5 * minimum_xspacing(grid)
+Δt_max = 0.6 * minimum_xspacing(grid)
 c_real = CenterField(grid)
 set!(c_real, c₀)
 
@@ -44,21 +45,34 @@ model2 = HydrostaticFreeSurfaceModel(; grid, velocities=PrescribedVelocityFields
 set!(model2, c=c₀)
 sim2 = Simulation(model2, Δt=Δt_max, stop_time=10)
 
+timestepper = SplitRungeKuttaTimeStepper(coefficients = [4, 3, 2, 1])
+
+model3 = HydrostaticFreeSurfaceModel(; grid, velocities=PrescribedVelocityFields(u=1), timestepper=timestepper, tracer_advection=advection, tracers=:c)
+set!(model3, c=c₀)
+sim3 = Simulation(model3, Δt=Δt_max, stop_time=10)
+
 sim1.output_writers[:solution] = JLD2Writer(model1, (; c = model1.tracers.c);
-                                            filename="one_d_simulation_NH.jld2",
+                                            filename="one_d_simulation_1.jld2",
                                             schedule=IterationInterval(10),
                                             overwrite_existing=true)
 
 sim2.output_writers[:solution] = JLD2Writer(model2, (; c = model2.tracers.c);
-                                            filename="one_d_simulation_HF.jld2",
+                                            filename="one_d_simulation_2.jld2",
+                                            schedule=IterationInterval(10),
+                                            overwrite_existing=true)
+
+sim3.output_writers[:solution] = JLD2Writer(model3, (; c = model3.tracers.c);
+                                            filename="one_d_simulation_3.jld2",
                                             schedule=IterationInterval(10),
                                             overwrite_existing=true)
 
 run!(sim1)
 run!(sim2)
+run!(sim3)
 
-c1 = FieldTimeSeries("one_d_simulation_NH.jld2", "c")
-c2 = FieldTimeSeries("one_d_simulation_HF.jld2", "c")
+c1 = FieldTimeSeries("one_d_simulation_1.jld2", "c")
+c2 = FieldTimeSeries("one_d_simulation_2.jld2", "c")
+c3 = FieldTimeSeries("one_d_simulation_3.jld2", "c")
 
 using GLMakie
 
@@ -69,8 +83,10 @@ fig = Figure()
 ax = Axis(fig[1, 1], title="c", xlabel="x", ylabel="t")
 c1i = @lift(interior(c1[$iter], :, 1, 1))
 c2i = @lift(interior(c2[$iter], :, 1, 1))
-lines!(ax, c1i, color=:blue, label = "RK3  (NonhydrostaticModel)")
-lines!(ax, c2i, color=:red,  label = "SRK3 (HydrostaticFreeSurfaceModel)")
+c3i = @lift(interior(c3[$iter], :, 1, 1))
+lines!(ax, c1i, color=:blue,  label = "RK3  (NonhydrostaticModel)")
+lines!(ax, c2i, color=:red,   label = "SRK3 (HydrostaticFreeSurfaceModel)")
+lines!(ax, c3i, color=:green, label = "Custom SRK3 (HydrostaticFreeSurfaceModel)")
 Legend(fig[0, 1], ax)
 
 GLMakie.record(fig, "one_d_simulation.mp4", 1:Nt) do i
diff --git a/validation/distributed_simulations/distributed_hydrostatic_turbulence.jl b/validation/distributed_simulations/distributed_hydrostatic_turbulence.jl
index 5749cecd8f..33ad3ac7be 100644
--- a/validation/distributed_simulations/distributed_hydrostatic_turbulence.jl
+++ b/validation/distributed_simulations/distributed_hydrostatic_turbulence.jl
@@ -1,12 +1,15 @@
 using Oceananigans
 using MPI
-using Oceananigans.Models.HydrostaticFreeSurfaceModels: VerticalVorticityField
 using Printf
 using Statistics
 using Oceananigans.BoundaryConditions
 using Oceananigans.DistributedComputations
+using Oceananigans.Grids
 using Random
 using JLD2
+using GLMakie
+using MPI
+MPI.Init()
 
 # Run with
 #
@@ -14,104 +17,100 @@ using JLD2
 #   mpiexec -n 4 julia --project distributed_hydrostatic_turbulence.jl
 # ```
 
-function run_simulation(nx, ny, arch; topology = (Periodic, Periodic, Bounded))
-    grid = RectilinearGrid(arch; topology, size = (Nx, Ny, 10), extent=(4π, 4π, 0.5), halo=(8, 8, 8))
+Nx = 64
+Ny = 64
 
-    bottom(x, y) = (x > π && x < 3π/2 && y > π/2 && y < 3π/2) ? 1.0 : - grid.Lz - 1.0
-    grid = ImmersedBoundaryGrid(grid, GridFittedBottom(bottom); active_cells_map = true)
+ranks = 2
+arch  = CPU()
 
-    model = HydrostaticFreeSurfaceModel(; grid,
-                                        momentum_advection = VectorInvariant(vorticity_scheme=WENO(order=9)),
-                                        free_surface = SplitExplicitFreeSurface(grid, substeps=10),
-                                        tracer_advection = WENO(),
-                                        buoyancy = nothing,
-                                        coriolis = FPlane(f = 1),
-                                        tracers = :c)
+grid = RectilinearGrid(arch; 
+                        topology = (Periodic, Periodic, Bounded), 
+                        size=(Nx, Ny, 1), 
+                        x = (0, 4π),
+                        y = (0, 4π),
+                        z = MutableVerticalDiscretization((-0.5, 0)), # (-0.5, 0), # 
+                        halo=(8, 8, 8))
 
-    # Scale seed with rank to avoid symmetry
-    local_rank = MPI.Comm_rank(arch.communicator)
-    Random.seed!(1234 * (local_rank + 1))
+# bottom(x, y) = (x > π && x < 3π/2 && y > π/2 && y < 3π/2) ? 1.0 : - grid.Lz - 1.0
+# grid = ImmersedBoundaryGrid(grid, GridFittedBottom(bottom); active_cells_map = true)
 
-    set!(model, u = (x, y, z) -> 1-2rand(), v = (x, y, z) -> 1-2rand())
+model = HydrostaticFreeSurfaceModel(; grid,
+                                    momentum_advection = WENOVectorInvariant(),
+                                    free_surface = ImplicitFreeSurface(), # solver_method=:PreconditionedConjugateGradient),
+                                    tracer_advection = WENO(),
+                                    timestepper = :SplitRungeKutta3,
+                                    buoyancy = nothing,
+                                    coriolis = FPlane(f = 1),
+                                    tracers = (:c, :constant))
 
-    mask(x, y, z) = x > 3π/2 && x < 5π/2 && y > 3π/2 && y < 5π/2
-    c = model.tracers.c
+# Scale seed with rank to avoid symmetry
+local_rank = MPI.Comm_rank(MPI.COMM_WORLD)
+Random.seed!(1234 * (local_rank + 1))
+mask(x, y, z) = x > 3π/2 && x < 5π/2 && y > 3π/2 && y < 5π/2
 
-    set!(c, mask)
+set!(model, u=(x, y, z)->1-2rand(), v=(x, y, z)->1-2rand(), c=mask, constant=1)
 
-    u, v, _ = model.velocities
-    # ζ = VerticalVorticityField(model)
-    η = model.free_surface.η
-    outputs = merge(model.velocities, model.tracers)
+# U, V = model.free_surface.barotropic_velocities
+# set!(U, model.velocities.u * 0.5)
+# set!(V, model.velocities.v * 0.5)
 
-    progress(sim) = @info "Iteration: $(sim.model.clock.iteration), time: $(sim.model.clock.time), Δt: $(sim.Δt)"
-    simulation = Simulation(model, Δt=0.02, stop_time=100.0)
+u, v, _ = model.velocities
+η = model.free_surface.η
+outputs = merge(model.velocities, model.tracers) #, (η=η, U=U, V=V))
 
-    wizard = TimeStepWizard(cfl = 0.2, max_change = 1.1)
+progress(sim) = @info "Iteration: $(sim.model.clock.iteration), time: $(sim.model.clock.time), Δt: $(sim.Δt), extrema c: $(extrema(model.tracers.constant) .- 1)"
+simulation = Simulation(model, Δt=0.2, stop_time=100.0)
 
-    simulation.callbacks[:progress] = Callback(progress, IterationInterval(100))
-    simulation.callbacks[:wizard]   = Callback(wizard,   IterationInterval(10))
+wizard = TimeStepWizard(cfl = 0.2, max_change = 1.1)
 
-    filepath = "mpi_hydrostatic_turbulence_rank$(local_rank)"
-    simulation.output_writers[:fields] =
-        JLD2Writer(model, outputs, filename=filepath, schedule=TimeInterval(0.1),
-                   overwrite_existing=true)
+simulation.callbacks[:progress] = Callback(progress, IterationInterval(100))
+simulation.callbacks[:wizard]   = Callback(wizard,   IterationInterval(10))
 
-    run!(simulation)
+filepath = "mpi_hydrostatic_turbulence"
+simulation.output_writers[:fields] =
+    JLD2Writer(model, outputs, filename=filepath, schedule=TimeInterval(0.1),
+                overwrite_existing=true)
 
-    MPI.Barrier(arch.communicator)
-end
+run!(simulation)
 
-Nx = 32
-Ny = 32
+# Visualize the plane
+# Produce a video for variable `var`
+function visualize_simulation(var)
+    iter = Observable(1)
 
-arch = Distributed(CPU(), partition = Partition(2, 2))
+    v = Vector(undef, ranks)
+    V = Vector(undef, ranks)
+    x = Vector(undef, ranks)
+    y = Vector(undef, ranks)
 
-# Run the simulation
-run_simulation(Nx, Ny, arch)
+    for r in 1:ranks
+        v[r] = FieldTimeSeries("mpi_hydrostatic_turbulence.jld2", var)
+        nx, ny, _ = size(v[r])
+        V[r] = @lift(interior(v[r][$iter], 1:nx, 1:ny, 1))
 
-# Visualize the plane
-# Produce a video for variable `var`
-try
-    using GLMakie
-
-    function visualize_simulation(var)
-        iter = Observable(1)
-
-        v = Vector(undef, 4)
-        V = Vector(undef, 4)
-        x = Vector(undef, 4)
-        y = Vector(undef, 4)
-
-        for r in 1:4
-            v[r] = FieldTimeSeries("mpi_hydrostatic_turbulence_rank$(r-1).jld2", var)
-            nx, ny, _ = size(v[r])
-            V[r] = @lift(interior(v[r][$iter], 1:nx, 1:ny, 1))
-
-            x[r] = xnodes(v[r])
-            y[r] = ynodes(v[r])
-        end
-
-        fig = Figure()
-        ax = Axis(fig[1, 1])
-        for r in 1:4
-            heatmap!(ax, x[r], y[r], V[r], colorrange = (-1.0, 1.0))
-        end
-
-        GLMakie.record(fig, "hydrostatic_test_" * var * ".mp4", 1:length(v[1].times), framerate = 11) do i
-            @info "step $i";
-            iter[] = i;
-        end
+        x[r] = xnodes(v[r])
+        y[r] = ynodes(v[r])
+    end
+
+    fig = Figure()
+    ax = Axis(fig[1, 1])
+    for r in 1:ranks
+        heatmap!(ax, x[r], y[r], V[r])
     end
 
-    if MPI.Comm_rank(MPI.COMM_WORLD) == 0
-        visualize_simulation("u")
-        visualize_simulation("v")
-        visualize_simulation("c")
+    GLMakie.record(fig, "hydrostatic_test_" * var * ".mp4", 1:length(v[1].times), framerate = 11) do i
+        @info "step $i";
+        iter[] = i;
     end
-catch err
-    @info err
 end
 
-MPI.Barrier(arch.communicator)
+if MPI.Comm_rank(MPI.COMM_WORLD) == 0
+    visualize_simulation("u")
+    visualize_simulation("v")
+    visualize_simulation("c")
+    # visualize_simulation("U")
+    # visualize_simulation("V")
+    visualize_simulation("constant")
+end
 
+MPI.Barrier(MPI.COMM_WORLD)
diff --git a/validation/implicit_dissipation/variance_dissipation.jl b/validation/implicit_dissipation/variance_dissipation.jl
index 28d7b17887..162be10054 100644
--- a/validation/implicit_dissipation/variance_dissipation.jl
+++ b/validation/implicit_dissipation/variance_dissipation.jl
@@ -1,5 +1,6 @@
 using Oceananigans
 using Oceananigans.Models.VarianceDissipationComputations
+using Oceananigans.TimeSteppers: SplitRungeKuttaTimeStepper, QuasiAdamsBashforth2TimeStepper
 using KernelAbstractions: @kernel, @index
 using GLMakie
 
@@ -51,86 +52,111 @@ function compute_tracer_dissipation!(sim)
     return nothing
 end
 
-tracer_advection = WENO(order=5)
-closure = ScalarDiffusivity(κ=1e-5)
-velocities = PrescribedVelocityFields(u=1)
-
-c⁻    = CenterField(grid)
-Δtc²  = CenterField(grid)
+cᵢ = CenterField(grid)
+set!(cᵢ, c₀)
 
-for (ts, timestepper) in zip((:AB2, :RK3), (:QuasiAdamsBashforth2, :SplitRungeKutta3))
+tracer_advection = WENO(order=7)
+closure = nothing # ScalarDiffusivity(κ=1e-5)
+velocities = PrescribedVelocityFields(u=1)
     
+function run_simulation(ts, timestepper; χ=nothing, velocities=velocities)   
+    c⁻    = CenterField(grid)
+    Δtc²  = CenterField(grid)     
+
     model = HydrostaticFreeSurfaceModel(; grid, 
-                                        timestepper, 
-                                        velocities, 
-                                        tracer_advection, 
-                                        closure, 
-                                        tracers=:c,
-                                        auxiliary_fields=(; Δtc², c⁻))
-                                            
+                                          timestepper, 
+                                          velocities, 
+                                          tracer_advection, 
+                                          closure, 
+                                          tracers=:c,
+                                          auxiliary_fields=(; Δtc², c⁻))
+                                       
+    if timestepper == :QuasiAdamsBashforth2 && χ !== nothing
+        model.timestepper.χ = χ
+    end
+
     set!(model, c=c₀)
     set!(model.auxiliary_fields.c⁻, c₀)
 
-    if ts == :AB2
-       Δt = 0.2 * minimum_xspacing(grid)
-    else
-       Δt = 0.2 * minimum_xspacing(grid)
+    if timestepper == :SplitRungeKutta3
+        Δt = 0.6 * minimum_xspacing(grid)
+    elseif timestepper == :QuasiAdamsBashforth2
+        Δt = 0.2 * minimum_xspacing(grid)
+    elseif timestepper == :D0
+        Δt = 1 * minimum_xspacing(grid)
     end
 
-    sim = Simulation(model; Δt, stop_time=10)
+    @show Δt
+    sim = Simulation(model; Δt, stop_time=12)
 
     ϵ = VarianceDissipation(:c, grid)
     f = Oceananigans.Models.VarianceDissipationComputations.flatten_dissipation_fields(ϵ)
 
     outputs = merge((; c = model.tracers.c, Δtc² = model.auxiliary_fields.Δtc²), f)
     add_callback!(sim, ϵ, IterationInterval(1))
+    add_callback!(sim, compute_tracer_dissipation!, IterationInterval(1))
 
-    sim.output_writers[:solution] = JLD2Writer(model, outputs;
-                                            filename="one_d_simulation_$(ts).jld2",
-                                            schedule=IterationInterval(100),
-                                            overwrite_existing=true)
+    iteration_interval = ceil(Int, 0.12 / Δt)
 
-    sim.callbacks[:compute_tracer_dissipation] = Callback(compute_tracer_dissipation!, IterationInterval(1))
+    sim.output_writers[:solution] = JLD2Writer(model, outputs;
+                                               filename="one_d_simulation_$(ts).jld2",
+                                               schedule=IterationInterval(iteration_interval),
+                                               overwrite_existing=true)
     
     run!(sim)
+
+    c    = FieldTimeSeries("one_d_simulation_$(ts).jld2", "c")
+    Δtc² = FieldTimeSeries("one_d_simulation_$(ts).jld2", "Δtc²")
+    Acx  = FieldTimeSeries("one_d_simulation_$(ts).jld2", "Acx")
+    Dcx  = FieldTimeSeries("one_d_simulation_$(ts).jld2", "Dcx")
+
+    Nt = length(c.times)
+
+    ∫closs = abs.([sum(interior(Δtc²[i], :, 1, 1) .* grid.Δxᶜᵃᵃ) for i in 2:Nt-1])
+    ∫A     = abs.([sum(interior(Acx[i] , :, 1, 1))               for i in 2:Nt-1])
+    ∫D     = abs.([sum(interior(Dcx[i] , :, 1, 1))               for i in 2:Nt-1])
+    ∫T     = ∫D .+ ∫A
+    times  = c.times[2:end-1]
+
+    return (; c, Δtc², Acx, Dcx, ∫closs, ∫A, ∫D, ∫T, times)
 end
 
-a_c    = FieldTimeSeries("one_d_simulation_AB2.jld2", "c")
-a_Δtc² = FieldTimeSeries("one_d_simulation_AB2.jld2", "Δtc²")
-a_Acx  = FieldTimeSeries("one_d_simulation_AB2.jld2", "Acx")
-a_Dcx  = FieldTimeSeries("one_d_simulation_AB2.jld2", "Dcx")
-
-r_c    = FieldTimeSeries("one_d_simulation_RK3.jld2", "c")
-r_Δtc² = FieldTimeSeries("one_d_simulation_RK3.jld2", "Δtc²")
-r_Acx  = FieldTimeSeries("one_d_simulation_RK3.jld2", "Acx")
-r_Dcx  = FieldTimeSeries("one_d_simulation_RK3.jld2", "Dcx")
-
-Nta = length(a_c.times)
-Ntr = length(r_c.times)
-
-a_∫closs = abs.([sum(interior(a_Δtc²[i], :, 1, 1) .* grid.Δxᶜᵃᵃ) for i in 2:Nta-1])
-a_∫A     = abs.([sum(interior(a_Acx[i] , :, 1, 1))               for i in 2:Nta-1])
-a_∫D     = abs.([sum(interior(a_Dcx[i] , :, 1, 1))               for i in 2:Nta-1])
-a_∫T     = a_∫D .+ a_∫A
-
-r_∫closs = abs.([sum(interior(r_Δtc²[i], :, 1, 1) .* grid.Δxᶜᵃᵃ) for i in 2:Ntr-1])
-r_∫A     = abs.([sum(interior(r_Acx[i] , :, 1, 1))               for i in 2:Ntr-1])
-r_∫D     = abs.([sum(interior(r_Dcx[i] , :, 1, 1))               for i in 2:Ntr-1])
-r_∫T     = r_∫D .+ r_∫A
-
-atimes = a_c.times[2:end-1]
-rtimes = r_c.times[2:end-1]
-
-fig = Figure()
-ax  = Axis(fig[1, 1], title="Dissipation", xlabel="Time (s)", ylabel="Dissipation", yscale=log10)
-
-scatter!(ax, atimes, a_∫closs, label="AB2 total variance loss", color=:blue)
-lines!(ax, atimes, a_∫A, label="AB2 advection dissipation", color=:red)
-lines!(ax, atimes, a_∫D, label="AB2 diffusive dissipation", color=:green)
-lines!(ax, atimes, a_∫T, label="AB2 total dissipation", color=:purple)
-
-scatter!(ax, rtimes, r_∫closs, label="RK3 total variance loss", color=:blue, marker=:diamond)
-lines!(ax, rtimes, r_∫A, label="RK3 advection dissipation", color=:red, linestyle=:dash)
-lines!(ax, rtimes, r_∫D, label="RK3 diffusive dissipation", color=:green, linestyle=:dash)
-lines!(ax, rtimes, r_∫T, label="RK3 total dissipation", color=:purple, linestyle=:dash)
-Legend(fig[1, 2], ax)
\ No newline at end of file
+using LaTeXStrings
+
+cases = Dict()
+
+cases["AB1"]  = run_simulation(:AB2, :QuasiAdamsBashforth2, χ=0.1)
+cases["RK3"]  = run_simulation(:RK3, :SplitRungeKutta3)
+cases["ICC"]  = run_simulation(:D0, :SplitRungeKutta3, velocities=PrescribedVelocityFields())
+
+fig = Figure(size = (1000, 250))
+ax  = Axis(fig[1, 1:2], 
+           xlabel=L"\text{x [m]}", 
+           ylabel=L"\text{tracer concentration [-]}",
+           xticks=([-1, -0.5, 0, 0.5, 1], latexstring.(string.([-1, -0.5, 0, 0.5, 1]))),
+           yticks=([0, 0.5, 1], latexstring.(string.([0, 0.5, 1]))))
+
+x, y, z = nodes(cases["AB1"].c)
+
+lines!(ax, x, interior(cases["ICC"].c[end], :, 1, 1), label = L"\text{initial tracer}", color = :black, linestyle = :dash, linewidth = 0.75)
+lines!(ax, x, interior(cases["AB1"].c[end], :, 1, 1), label = L"QAB2, \ \epsilon = 0.1", color = :blue)
+lines!(ax, x, interior(cases["RK3"].c[end], :, 1, 1), label = L"RK, \ M = 3", color = :red)
+axislegend(ax, position=:rt, framevisible=false)
+
+ax2 = Axis(fig[1, 3], 
+           xlabel=L"\text{time [s]}", 
+           ylabel=L"\text{integrated variance loss}",
+           xticks=([0, 2, 4, 6, 8, 10], latexstring.(string.([0, 2, 4, 6, 8, 10]))),
+           yticks=([0, 0.2, 0.4, 0.6], latexstring.(string.([0, 0.2, 0.4, 0.6]))))
+
+lines!(ax2,   cases["AB1"].times, cases["AB1"].∫T, label = L"QAB2, \ \text{calculated}", color = :blue)
+lines!(ax2,   cases["RK3"].times, cases["RK3"].∫T, label = L"RK, \ \text{calculated}", color = :red)
+scatter!(ax2, cases["AB1"].times[1:4:end], cases["AB1"].∫closs[1:4:end], label = L"QAB2, \text{computed}", color = :blue)
+scatter!(ax2, cases["RK3"].times[1:4:end], cases["RK3"].∫closs[1:4:end], label = L"RK, \text{computed}", color = :red)
+axislegend(ax2, position=:rt, framevisible=false)
+
+# for key in keys(cases)
+#     case = cases[key]
+#     scatter!(ax, case.times, case.∫closs, label="$(key) total variance loss")
+#     lines!(ax, case.times, case.∫T, label="$(key) total dissipation")
+# end
\ No newline at end of file
diff --git a/validation/implicit_free_surface/geostrophic_adjustment_test.jl b/validation/implicit_free_surface/geostrophic_adjustment_test.jl
index 1bca635155..47daa77ddb 100644
--- a/validation/implicit_free_surface/geostrophic_adjustment_test.jl
+++ b/validation/implicit_free_surface/geostrophic_adjustment_test.jl
@@ -27,13 +27,13 @@ function geostrophic_adjustment_simulation(free_surface, grid, timestepper=:Quas
     L = grid.Lx / 40 # gaussian width
     x₀ = grid.Lx / 4 # gaussian center
 
-    vᴳ(x, y, z) = -U * (x - x₀) / L * gaussian(x - x₀, L)
-    Vᴳ(x, y) = grid.Lz * vᴳ(x, y, 1) 
+    vᴳ(x, z) = -U * (x - x₀) / L * gaussian(x - x₀, L)
+    Vᴳ(x) = grid.Lz * vᴳ(x, 1) 
 
     g  = model.free_surface.gravitational_acceleration
     η₀ = model.coriolis.f * U * L / g # geostrophic free surface amplitude
 
-    ηᴳ(x, y, z) = 2 * η₀ * gaussian(x - x₀, L)
+    ηᴳ(x, z) = 2 * η₀ * gaussian(x - x₀, L)
 
     set!(model, v=vᴳ, η=ηᴳ, c=1)
 
@@ -46,16 +46,17 @@ function geostrophic_adjustment_simulation(free_surface, grid, timestepper=:Quas
         z = model.grid.z
         Oceananigans.BoundaryConditions.fill_halo_regions!(model.free_surface.η)
         parent(z.ηⁿ)   .=  parent(model.free_surface.η)
-        for i in 0:grid.Nx+1, j in 0:grid.Ny+1
-            Oceananigans.Models.HydrostaticFreeSurfaceModels.update_grid_scaling!(z.σᶜᶜⁿ, z.σᶠᶜⁿ, z.σᶜᶠⁿ, z.σᶠᶠⁿ, z.σᶜᶜ⁻, i, j, grid, z.ηⁿ)
+        for i in -1:grid.Nx+2
+            Oceananigans.Models.HydrostaticFreeSurfaceModels.update_grid_scaling!(z, i, 1, grid)
         end
+        parent(z.σᶜᶜ⁻) .= parent(z.σᶜᶜⁿ)
     end
         
-    stop_iteration=1000
+    stop_iteration=10000
 
     gravity_wave_speed = sqrt(g * grid.Lz) # hydrostatic (shallow water) gravity wave speed
     wave_propagation_time_scale = model.grid.Δxᶜᵃᵃ / gravity_wave_speed
-    simulation = Simulation(model; Δt = 0.2 * wave_propagation_time_scale, stop_iteration)
+    simulation = Simulation(model; Δt = 2 * wave_propagation_time_scale, stop_iteration)
 
     ηarr = Vector{Field}(undef, stop_iteration+1)
     varr = Vector{Field}(undef, stop_iteration+1)
@@ -66,26 +67,29 @@ function geostrophic_adjustment_simulation(free_surface, grid, timestepper=:Quas
 
     save_η(sim) = ηarr[sim.model.clock.iteration+1] = deepcopy(sim.model.free_surface.η)
     save_v(sim) = varr[sim.model.clock.iteration+1] = deepcopy(sim.model.velocities.v)
-    save_u(sim) = uarr[sim.model.clock.iteration+1] = deepcopy(sim.model.velocities.u)
+    save_u(sim) = uarr[sim.model .clock.iteration+1] = deepcopy(sim.model.velocities.u)
     save_c(sim) = carr[sim.model.clock.iteration+1] = deepcopy(sim.model.tracers.c)
-    save_w(sim) = warr[sim.model.clock.iteration+1] .= sim.model.velocities.w[1:sim.model.grid.Nx, 2, 2]
+    save_w(sim) = warr[sim.model.clock.iteration+1] .= sim.model.velocities.w[1:sim.model.grid.Nx, 1, 2]
     
     if grid isa MutableGridOfSomeKind
-        save_g(sim) = garr[sim.model.clock.iteration+1] .= sim.model.grid.z.ηⁿ[1:sim.model.grid.Nx, 2, 1]
+        save_g(sim) = garr[sim.model.clock.iteration+1] .= sim.model.grid.z.ηⁿ[1:sim.model.grid.Nx, 1, 1]
         simulation.callbacks[:save_g] = Callback(save_g, IterationInterval(1))
     end
 
+    V = KernelFunctionOperation{Center, Center, Center}(Oceananigans.Operators.Vᶜᶜᶜ, grid)
+    cav = [sum(model.tracers.c * V) / sum(V)]
+
     function progress_message(sim) 
         H = sum(sim.model.free_surface.η)
-        msg = @sprintf("[%.2f%%], iteration: %d, time: %.3f, max|w|: %.2e, sim(η): %e",
+        push!(cav, sum(model.tracers.c * V) / sum(V))
+
+        msg = @sprintf("[%.2f], iteration: %d, time: %.3f, max|u|: %.2e, sim(η): %e, ",
                         100 * sim.model.clock.time / sim.stop_time, sim.model.clock.iteration,
                         sim.model.clock.time, maximum(abs, sim.model.velocities.u), H)
 
-        if grid isa MutableGridOfSomeKind
-                msg2 = @sprintf(", max(Δη): %.2e", maximum(sim.model.grid.z.ηⁿ[1:sim.model.grid.Nx, 2, 1] .- interior(sim.model.free_surface.η)))
-                msg  = msg * msg2
-        end
-        
+        msg2 = @sprintf("drift c: %6.3e ", cav[end] - cav[1])
+        msg  = msg * msg2
+
         @info msg
     end
 
@@ -97,28 +101,29 @@ function geostrophic_adjustment_simulation(free_surface, grid, timestepper=:Quas
 
     run!(simulation)
 
-    return (η=ηarr, v=varr, u=uarr, c=carr, w=warr, g=garr), model
+    return (η=ηarr, v=varr, u=uarr, c=carr, w=warr, g=garr, cav=cav), model
 end
 
 Lh = 100kilometers
 Lz = 400meters
 
-grid = RectilinearGrid(size = (80, 3, 1),
-                       halo = (2, 2, 2),
-                       x = (0, Lh), y = (0, Lh), 
-                       z = (-Lz, 0), #MutableVerticalDiscretization((-Lz, 0)),
-                       topology = (Periodic, Periodic, Bounded))
+grid = RectilinearGrid(size = (80, 1),
+                       halo = (5, 5),
+                       x = (0, Lh),
+                       z = MutableVerticalDiscretization((-Lz, 0)), # (-Lz, 0), #  
+                       topology = (Periodic, Flat, Bounded))
+
+
+bottom(x) = x < 38kilometers && x > 26kilometers ? 0 : -Lz-1
+grid  = ImmersedBoundaryGrid(grid, GridFittedBottom(bottom))
 
-explicit_free_surface = ExplicitFreeSurface()
-implicit_free_surface = ImplicitFreeSurface()
+explicit_free_surface      = ExplicitFreeSurface()
+implicit_free_surface      = ImplicitFreeSurface()
 splitexplicit_free_surface = SplitExplicitFreeSurface(deepcopy(grid), substeps=120)
 
-seab2, sim2 = geostrophic_adjustment_simulation(splitexplicit_free_surface, deepcopy(grid))
 serk3, sim3 = geostrophic_adjustment_simulation(splitexplicit_free_surface, deepcopy(grid), :SplitRungeKutta3)
-efab2, sim4 = geostrophic_adjustment_simulation(explicit_free_surface, deepcopy(grid))
-efrk3, sim5 = geostrophic_adjustment_simulation(explicit_free_surface, deepcopy(grid), :SplitRungeKutta3)
-imab2, sim6 = geostrophic_adjustment_simulation(implicit_free_surface, deepcopy(grid))
-imrk3, sim7 = geostrophic_adjustment_simulation(implicit_free_surface, deepcopy(grid), :SplitRungeKutta3)
+# efrk3, sim5 = geostrophic_adjustment_simulation(explicit_free_surface,      deepcopy(grid), :SplitRungeKutta3)
+imrk3, sim7 = geostrophic_adjustment_simulation(implicit_free_surface,      deepcopy(grid), :SplitRungeKutta3)
 
 import Oceananigans.Fields: interior
 interior(a::Array, idx...) = a
@@ -126,11 +131,11 @@ interior(a::Array, idx...) = a
 function plot_variable(sims, var; 
                        filename="test.mp4",
                        labels=nothing,
-                       Nt=length(sims[1][var]))
+                       Nt=length(sims[1][var]),
+                       ylim = nothing)
     fig = Figure()
     ax  = Axis(fig[1, 1])
 
-
     iter = Observable(1)
     for (is, sim) in enumerate(sims)
         vi = @lift(interior(sim[var][$iter], :, 1, 1))
@@ -144,6 +149,10 @@ function plot_variable(sims, var;
 
     axislegend(ax; position=:rt)
 
+    if !isnothing(ylim)
+        ylims!(ax, ylim)
+    end
+
     record(fig, filename, 1:Nt, framerate=15) do i
         @info "Frame $i of $Nt"
         iter[] = i
@@ -177,4 +186,15 @@ function plot_variable2(sims, var1, var2;
         @info "Frame $i of $Nt"
         iter[] = i
     end
-end
\ No newline at end of file
+end
+
+# @inline dη_local(i, j, k, grid, U, V) = (Oceananigans.Operators.δxᶜᶜᶜ(i, j, k, grid, Oceananigans.Operators.Δy_qᶠᶜᶜ, U) + 
+#                                          Oceananigans.Operators.δyᶜᶜᶜ(i, j, k, grid, Oceananigans.Operators.Δx_qᶜᶠᶜ, V)) * 
+#                                          Oceananigans.Operators.Az⁻¹ᶜᶜᶜ(i, j, k, grid)
+
+# model = sim3
+# ηⁿ⁻¹ = deepcopy(model.free_surface.η)
+# time_step!(model, 10);
+# dη1 = (interior(model.free_surface.η, :, 1, 1) .- interior(ηⁿ⁻¹, :, 1, 1)) ./ 10
+# dη2 = interior(compute!(Field(KernelFunctionOperation{Center, Center, Nothing}(dη_local, grid, model.free_surface.filtered_state.Ũ, model.free_surface.filtered_state.Ṽ))),:, 1,1)
+# dη3 = interior(compute!(Field(KernelFunctionOperation{Center, Center, Nothing}(dη_local, grid, model.free_surface.barotropic_velocities.U, model.free_surface.barotropic_velocities.V))),:, 1,1)
\ No newline at end of file
diff --git a/validation/mesoscale_turbulence/baroclinic_adjustment.jl b/validation/mesoscale_turbulence/baroclinic_adjustment.jl
index 82eb68c902..561b83ff42 100644
--- a/validation/mesoscale_turbulence/baroclinic_adjustment.jl
+++ b/validation/mesoscale_turbulence/baroclinic_adjustment.jl
@@ -13,8 +13,6 @@ Oceananigans.defaults.FloatType = Float32
 filename = "baroclinic_adjustment"
 
 # Architecture
-using Metal
-metal = Metal.MetalBackend()
 # arch = GPU(metal)
 arch = CPU()
 
diff --git a/validation/z_star_coordinate/lock_release.jl b/validation/z_star_coordinate/lock_release.jl
index fa63afafde..33c3a069d6 100644
--- a/validation/z_star_coordinate/lock_release.jl
+++ b/validation/z_star_coordinate/lock_release.jl
@@ -3,33 +3,63 @@ using Oceananigans.Grids
 using Oceananigans.Units
 using Oceananigans.Utils: prettytime
 using Oceananigans.Advection: WENOVectorInvariant
+using Oceananigans.DistributedComputations
+using Oceananigans.Operators
 using Printf
+using GLMakie
+using KernelAbstractions: @kernel, @index
+
+@kernel function _compute_dissipation!(Δtc², c⁻, grid, c, Δt)
+    i, j, k = @index(Global, NTuple)
+    @inbounds begin
+        Δtc²[i, j, k] = (c[i, j, k]^2 - c⁻[i, j, k]^2) / Δt * Vᶜᶜᶜ(i, j, k, grid)
+        c⁻[i, j, k]   = c[i, j, k]
+    end
+end
+
+function compute_tracer_dissipation!(sim)
+    c    = sim.model.tracers.b
+    c⁻   = sim.model.auxiliary_fields.b⁻
+    Δtc² = sim.model.auxiliary_fields.Δtb²
+    Oceananigans.Utils.launch!(CPU(), sim.model.grid, :xyz,
+                               _compute_dissipation!,
+                               Δtc², c⁻, grid, c, sim.Δt)
 
-z_faces = MutableVerticalDiscretization((-20, 0))
+    return nothing
+end
 
-grid = RectilinearGrid(size = (128, 20),
+arch = CPU() # Distributed(CPU())#; synchronized_communication=true) 
+z_faces = MutableVerticalDiscretization((-200, 0)) # (-200, 0) # 
+
+grid = RectilinearGrid(arch; 
+                       size = (128, 20),
                           x = (0, 64kilometers),
                           z = z_faces,
                        halo = (6, 6),
                    topology = (Bounded, Flat, Bounded))
 
+bottom(x) = x < 52kilometers && x > 45kilometers ? -1000 : -2000
+grid  = ImmersedBoundaryGrid(grid, GridFittedBottom(bottom))
+
+b⁻   = CenterField(grid)
+Δtb² = CenterField(grid)
+
 model = HydrostaticFreeSurfaceModel(; grid,
-                         momentum_advection = WENO(order=5),
-                           tracer_advection = WENO(order=7),
+                         momentum_advection = Centered(),
+                           tracer_advection = WENO(),
                                    buoyancy = BuoyancyTracer(),
-                                    closure = (VerticalScalarDiffusivity(ν=1e-4), HorizontalScalarDiffusivity(ν=1.0)),
                                     tracers = (:b, :c),
-                                timestepper = :SplitRungeKutta3,
-                        vertical_coordinate = ZStarCoordinate(grid),
-                               free_surface = SplitExplicitFreeSurface(grid; substeps=20)) # 
+                                    closure = HorizontalScalarDiffusivity(ν=100),
+                                timestepper = :QuasiAdamsBashforth2,
+                               free_surface = SplitExplicitFreeSurface(grid; substeps=30),  # ImplicitFreeSurface()) # #
+                           auxiliary_fields = (; Δtb², b⁻))
 
 g = model.free_surface.gravitational_acceleration
-bᵢ(x, z) = x > 32kilometers ? 0.06 : 0.01
-
-set!(model, b = bᵢ, c = 1)
+bᵢ(x, z) = x > 20kilometers ? 6 // 100 : 1 // 100
+set!(model, b = bᵢ, c = (x, z) -> - 1)
 
 # Same timestep as in the ilicak paper
-Δt = 1
+Δt = 20 /3 # Oceananigans.defaults.FloatType(1 // 10)
 
 @info "the time step is $Δt"
 
@@ -38,55 +68,80 @@ simulation = Simulation(model; Δt, stop_time=17hours)
 Δz = zspacings(grid, Center(), Center(), Center())
 V  = KernelFunctionOperation{Center, Center, Center}(Oceananigans.Operators.Vᶜᶜᶜ, grid)
 
-field_outputs = merge(model.velocities, model.tracers, (; Δz))
-
-simulation.output_writers[:other_variables] = JLD2Writer(model, field_outputs,
-                                                         overwrite_existing = true,
-                                                         schedule = IterationInterval(100),
-                                                         filename = "zstar_model")
-
-# The two different estimates of the free-surface
-et1 = []
-et2 = []
-
 # Initial conditions
 bav = [sum(model.tracers.b * V) / sum(V)]
 cav = [sum(model.tracers.c * V) / sum(V)]
 vav = [sum(V)]
+mxc = [maximum(model.tracers.c)]
+mnc = [minimum(model.tracers.c)]
 
 function progress(sim)
     w  = interior(sim.model.velocities.w, :, :, sim.model.grid.Nz+1)
     u  = sim.model.velocities.u
+    Nx = size(grid, 1)
+
     push!(bav, sum(model.tracers.b * V) / sum(V))
     push!(cav, sum(model.tracers.c * V) / sum(V))
     push!(vav, sum(V))
+    push!(mxc, maximum(model.tracers.c))
+    push!(mnc, minimum(model.tracers.c))
 
-    msg0 = @sprintf("Time: %s iteration %d ", prettytime(sim.model.clock.time), sim.model.clock.iteration)
+    msgn = @sprintf("") #Rank: %d, ", arch.local_rank)
+    msg0 = @sprintf("Time: %s, ", prettytime(sim.model.clock.time))
     msg1 = @sprintf("extrema w: %.2e %.2e ",  maximum(w),  minimum(w))
-    msg2 = @sprintf("extrema u: %.2e %.2e ",  maximum(u),  minimum(u))
-    msg3 = @sprintf("drift b: %6.3e ", bav[end] - bav[1])
-    msg4 = @sprintf("extrema Δz: %.2e %.2e ", maximum(Δz), minimum(Δz))
-    @info msg0 * msg1 * msg2 * msg3 * msg4
+    msg2 = @sprintf("drift b: %6.3e ", bav[end] - bav[1])
+    msg4 = @sprintf("drift c: %6.3e ", cav[end] - cav[1])
 
-    push!(et1, deepcopy(interior(model.free_surface.η, :, 1, 1)))
-    push!(et2, deepcopy(model.grid.z.ηⁿ[1:128, 1, 1]))
+    @handshake @info msgn * msg0 * msg1 * msg2 * msg4
 
     return nothing
 end
 
+ϵ = Oceananigans.Models.VarianceDissipationComputations.VarianceDissipation(:b, grid)
+f = Oceananigans.Models.VarianceDissipationComputations.flatten_dissipation_fields(ϵ)
+
 simulation.callbacks[:progress] = Callback(progress, IterationInterval(100))
+simulation.callbacks[:compute_tracer_dissipation] = Callback(compute_tracer_dissipation!, IterationInterval(1))
+simulation.callbacks[:variance_dissipation_b] = Callback(ϵ, IterationInterval(1))
+
+field_outputs = merge(model.velocities, model.tracers, (; Δz), f, model.auxiliary_fields)
+
+simulation.output_writers[:other_variables] = JLD2Writer(model, field_outputs,
+                                                         overwrite_existing = true,
+                                                         schedule = IterationInterval(100),
+                                                         filename = "zstar_model")
 
 run!(simulation)
  
 b = model.tracers.b
 x, y, z = nodes(b)
 
-fig = Figure()
-ax  = Axis(fig[1, 1], title = "Integral property conservation")
-lines!(ax, (vav .- vav[1]) ./ vav[1], label = "Volume anomaly")
-lines!(ax, (bav .- bav[1]) ./ bav[1], label = "Buoyancy anomaly")
-lines!(ax, (cav .- cav[1]) ./ cav[1], label = "Tracer anomaly")
-axislegend(ax, position=:lt)
-ax  = Axis(fig[1, 2], title = "Final buoyancy field") 
-contourf!(ax, x, z, interior(model.tracers.b, :, 1, :), colormap=:balance, levels=20)
-vlines!(ax, 62.3e3, linestyle = :dash, linewidth = 3, color = :black)
+function running_mean(v, points)
+    n  = length(v)
+    rm = zeros(length(v) - 2points+1)
+    for i in points+1:n-points
+        rm[i-points] = mean(v[i - points:i+points])
+    end
+    return rm[1:end-1]
+end
+
+# fig = Figure()
+# ax  = Axis(fig[1, 1], title = "Integral property conservation")
+# lines!(ax, (vav .- vav[1]) ./ vav[1], label = "Volume anomaly")
+# lines!(ax, (bav .- bav[1]) ./ bav[1], label = "Buoyancy anomaly")
+# lines!(ax, (cav .- cav[1]) ./ cav[1], label = "Tracer anomaly")
+# axislegend(ax, position=:lt)
+# ax  = Axis(fig[1, 2], title = "Final buoyancy field") 
+# contourf!(ax, x, z, interior(model.tracers.b, :, 1, :), colormap=:balance, levels=20)
+# vlines!(ax, 62.3e3, linestyle = :dash, linewidth = 3, color = :black)
+
+Δtc² = FieldTimeSeries("zstar_model.jld2", "Δtb²")
+Acx  = FieldTimeSeries("zstar_model.jld2", "Abx")
+Acz  = FieldTimeSeries("zstar_model.jld2", "Abz")
+Dcx  = FieldTimeSeries("zstar_model.jld2", "Dbx")
+Nt   = length(Acx)
+
+∫closs = abs.([sum(Δtc²[i])               for i in 2:Nt-1])
+∫A     = abs.([sum(Acx[i]) + sum(Acz[i])  for i in 2:Nt-1])
+∫D     = abs.([sum(Dcx[i])                for i in 2:Nt-1])
+∫T     = ∫D .+ ∫A