diff --git a/src/Advection/topologically_conditional_interpolation.jl b/src/Advection/topologically_conditional_interpolation.jl index c7d1068ede4..4aad0044db4 100644 --- a/src/Advection/topologically_conditional_interpolation.jl +++ b/src/Advection/topologically_conditional_interpolation.jl @@ -20,7 +20,9 @@ using Oceananigans.Grids: AbstractGrid, const AG = AbstractGrid # topologies bounded at least on one side -const BT = Union{Bounded, RightConnected, LeftConnected} +const BT = Union{Bounded, RightConnected, LeftConnected, + LeftConnectedRightCenterFolded, LeftConnectedRightFaceFolded, + LeftConnectedRightCenterConnected, LeftConnectedRightFaceConnected} # Bounded underlying Grids const AGX = AG{<:Any, <:BT} diff --git a/src/BoundaryConditions/field_boundary_conditions.jl b/src/BoundaryConditions/field_boundary_conditions.jl index a7da506c111..194babc0498 100644 --- a/src/BoundaryConditions/field_boundary_conditions.jl +++ b/src/BoundaryConditions/field_boundary_conditions.jl @@ -21,6 +21,10 @@ default_prognostic_bc(::RightConnected, ::Center, default) = default.boundary_c default_prognostic_bc(::RightFaceFolded, ::Center, default) = default.boundary_condition default_prognostic_bc(::RightCenterFolded, ::Center, default) = default.boundary_condition +const DistributedFoldTopology = Union{LeftConnectedRightCenterFolded, LeftConnectedRightFaceFolded, + LeftConnectedRightCenterConnected, LeftConnectedRightFaceConnected} + +default_prognostic_bc(::DistributedFoldTopology, ::Center, default) = default.boundary_condition # TODO: make model constructors enforce impenetrability on velocity components to simplify this code default_prognostic_bc(::Bounded, ::Face, default) = ImpenetrableBoundaryCondition() @@ -28,6 +32,7 @@ default_prognostic_bc(::LeftConnected, ::Face, default) = ImpenetrableBoundaryC default_prognostic_bc(::RightConnected, ::Face, default) = ImpenetrableBoundaryCondition() default_prognostic_bc(::RightFaceFolded, ::Face, default) = ImpenetrableBoundaryCondition() default_prognostic_bc(::RightCenterFolded, ::Face, default) = ImpenetrableBoundaryCondition() +default_prognostic_bc(::DistributedFoldTopology, ::Face, default) = ImpenetrableBoundaryCondition() default_prognostic_bc(::Bounded, ::Nothing, default) = nothing default_prognostic_bc(::Flat, ::Nothing, default) = nothing @@ -37,6 +42,7 @@ default_prognostic_bc(::LeftConnected, ::Nothing, default) = nothing default_prognostic_bc(::RightConnected, ::Nothing, default) = nothing default_prognostic_bc(::RightFaceFolded, ::Nothing, default) = nothing default_prognostic_bc(::RightCenterFolded, ::Nothing, default) = nothing +default_prognostic_bc(::DistributedFoldTopology, ::Nothing, default) = nothing _default_auxiliary_bc(topo, loc) = default_prognostic_bc(topo, loc, DefaultBoundaryCondition()) _default_auxiliary_bc(::Bounded, ::Face) = nothing @@ -44,6 +50,7 @@ _default_auxiliary_bc(::RightConnected, ::Face) = nothing _default_auxiliary_bc(::LeftConnected, ::Face) = nothing _default_auxiliary_bc(::RightFaceFolded, ::Face) = nothing _default_auxiliary_bc(::RightCenterFolded, ::Face) = nothing +_default_auxiliary_bc(::DistributedFoldTopology, ::Face) = nothing default_auxiliary_bc(grid, ::Val{:east}, loc) = _default_auxiliary_bc(topology(grid, 1)(), loc[1]) default_auxiliary_bc(grid, ::Val{:west}, loc) = _default_auxiliary_bc(topology(grid, 1)(), loc[1]) diff --git a/src/DistributedComputations/distributed_grids.jl b/src/DistributedComputations/distributed_grids.jl index 7eded1e9d73..ea28729d51d 100644 --- a/src/DistributedComputations/distributed_grids.jl +++ b/src/DistributedComputations/distributed_grids.jl @@ -349,6 +349,26 @@ insert_connected_topology(::Type{Bounded}, R, r) = ifelse(R == 1, Bounded, insert_connected_topology(::Type{Periodic}, R, r) = ifelse(R == 1, Periodic, FullyConnected) +# Fold-aware topology insertion for distributed tripolar grids. +# These take 5 arguments: (global_y_topology, Ry, ry, Rx, rx) where +# Ry/ry are y-rank count/index and Rx/rx are x-rank count/index (all 1-based). + +local_fold_topology(::Type{RightCenterFolded}, Rx, rx) = + Rx == 1 ? LeftConnectedRightCenterFolded : LeftConnectedRightCenterConnected + +local_fold_topology(::Type{RightFaceFolded}, Rx, rx) = + Rx == 1 ? LeftConnectedRightFaceFolded : LeftConnectedRightFaceConnected + +function insert_connected_topology(T::Type{<:Union{RightCenterFolded, RightFaceFolded}}, Ry, ry, Rx, rx) + if ry == 1 + return RightConnected + elseif ry == Ry + return local_fold_topology(T, Rx, rx) + else + return FullyConnected + end +end + """ reconstruct_global_topology(T, R, r, r1, r2, arch) diff --git a/src/Grids/Grids.jl b/src/Grids/Grids.jl index 7843ee1826b..11d171ecaa9 100644 --- a/src/Grids/Grids.jl +++ b/src/Grids/Grids.jl @@ -4,6 +4,8 @@ export Center, Face export AbstractTopology, topology export Periodic, Bounded, Flat, FullyConnected, LeftConnected, RightConnected export RightFaceFolded, RightCenterFolded +export LeftConnectedRightCenterFolded, LeftConnectedRightFaceFolded +export LeftConnectedRightCenterConnected, LeftConnectedRightFaceConnected export AbstractGrid, AbstractUnderlyingGrid, halo_size, total_size export RectilinearGrid export AbstractCurvilinearGrid, AbstractHorizontallyCurvilinearGrid @@ -117,6 +119,41 @@ Grid topology for tripolar U-point pivot connection. """ struct RightCenterFolded <: AbstractTopology end +""" + LeftConnectedRightCenterFolded + +Local grid topology for the northernmost y-rank of a 1×N distributed tripolar grid +with U-point pivot (serial fold). Connected to the south neighbor on the left, +center-folded on the right (north). +""" +struct LeftConnectedRightCenterFolded <: AbstractTopology end + +""" + LeftConnectedRightFaceFolded + +Local grid y topology for the northernmost y-rank of a 1×N distributed tripolar grid +with F-point pivot (serial fold). Connected to the south neighbor on the left, +face-folded on the right (north). Face-extended (Ny+1 Face points in y). +""" +struct LeftConnectedRightFaceFolded <: AbstractTopology end + +""" + LeftConnectedRightCenterConnected + +Local grid y topology for the northernmost y-rank of an M×N distributed tripolar grid +with U-point pivot (distributed zipper). +""" +struct LeftConnectedRightCenterConnected <: AbstractTopology end + +""" + LeftConnectedRightFaceConnected + +Local grid y topology for the northernmost y-rank of an M×N distributed tripolar grid +with F-point pivot (distributed zipper). +Face-extended (Ny+1 Face points in y). +""" +struct LeftConnectedRightFaceConnected <: AbstractTopology end + ##### ##### Directions (for tilted domains) ##### diff --git a/src/Grids/grid_utils.jl b/src/Grids/grid_utils.jl index 4c17c7efbd5..d5f7112117d 100644 --- a/src/Grids/grid_utils.jl +++ b/src/Grids/grid_utils.jl @@ -40,7 +40,9 @@ end end end -const BoundedTopology = Union{Bounded, LeftConnected, RightFaceFolded} +const BoundedTopology = Union{Bounded, LeftConnected, RightFaceFolded, + LeftConnectedRightFaceFolded, + LeftConnectedRightFaceConnected} const AT = AbstractTopology Base.length(::Face, ::BoundedTopology, N) = N + 1 diff --git a/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/SplitExplicitFreeSurfaces.jl b/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/SplitExplicitFreeSurfaces.jl index 1ac4e176980..e6adc8480db 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/SplitExplicitFreeSurfaces.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/SplitExplicitFreeSurfaces.jl @@ -10,7 +10,11 @@ using Oceananigans.ImmersedBoundaries: column_depthTᶠᶜᵃ, column_depthTᶜ using Oceananigans.Operators: ∂xᵣTᶠᶜᶠ, ∂xᵣᶠᶜᶠ, ∂yᵣTᶜᶠᶠ, ∂yᵣᶜᶠᶠ, δxTᶜᵃᵃ, δxᶜᵃᵃ, δyTᵃᶜᵃ, δyᵃᶜᵃ using Oceananigans.BoundaryConditions: FieldBoundaryConditions, fill_halo_regions! using Oceananigans.Fields: Field -using Oceananigans.Grids: Center, Face, get_active_column_map, topology +using Oceananigans.Grids: Center, Face, get_active_column_map, topology, + LeftConnected, RightConnected, FullyConnected, + RightCenterFolded, RightFaceFolded, + LeftConnectedRightCenterFolded, LeftConnectedRightFaceFolded, + LeftConnectedRightCenterConnected, LeftConnectedRightFaceConnected using Oceananigans.ImmersedBoundaries: mask_immersed_field! using Oceananigans.Models.HydrostaticFreeSurfaceModels: AbstractFreeSurface, free_surface_displacement_field, diff --git a/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/split_explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/split_explicit_free_surface.jl index 9edbcf39afa..6d1083f5a74 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/split_explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/SplitExplicitFreeSurfaces/split_explicit_free_surface.jl @@ -225,7 +225,10 @@ function hydrostatic_tendency_fields(velocities, free_surface::SplitExplicitFree return merge((u=u, v=v, U=U, V=V), tracers) end -const ConnectedTopology = Union{LeftConnected, RightConnected, FullyConnected, RightCenterFolded, RightFaceFolded} +const ConnectedTopology = Union{LeftConnected, RightConnected, FullyConnected, + RightCenterFolded, RightFaceFolded, + LeftConnectedRightCenterFolded, LeftConnectedRightFaceFolded, + LeftConnectedRightCenterConnected, LeftConnectedRightFaceConnected} # Internal function for HydrostaticFreeSurfaceModel function materialize_free_surface(free_surface::SplitExplicitFreeSurface{extend_halos}, velocities, grid) where {extend_halos} @@ -403,6 +406,12 @@ split_explicit_kernel_size(::Type{LeftConnected}, N, H) = -H+2:N split_explicit_kernel_size(::Type{RightCenterFolded}, N, H) = 1:N+H-1 split_explicit_kernel_size(::Type{RightFaceFolded}, N, H) = 1:N+H-1 +# Distributed fold topologies: connected on both sides (left=MPI, right=fold/zipper) +split_explicit_kernel_size(::Type{LeftConnectedRightCenterFolded}, N, H) = -H+2:N+H-1 +split_explicit_kernel_size(::Type{LeftConnectedRightFaceFolded}, N, H) = -H+2:N+H-1 +split_explicit_kernel_size(::Type{LeftConnectedRightCenterConnected}, N, H) = -H+2:N+H-1 +split_explicit_kernel_size(::Type{LeftConnectedRightFaceConnected}, N, H) = -H+2:N+H-1 + # Adapt Adapt.adapt_structure(to, free_surface::SplitExplicitFreeSurface{extend_halos}) where {extend_halos} = SplitExplicitFreeSurface{extend_halos}(Adapt.adapt(to, free_surface.displacement), diff --git a/src/Models/NonhydrostaticModels/compute_nonhydrostatic_buffer_tendencies.jl b/src/Models/NonhydrostaticModels/compute_nonhydrostatic_buffer_tendencies.jl index 0972d277412..fb34077bb0d 100644 --- a/src/Models/NonhydrostaticModels/compute_nonhydrostatic_buffer_tendencies.jl +++ b/src/Models/NonhydrostaticModels/compute_nonhydrostatic_buffer_tendencies.jl @@ -2,6 +2,7 @@ import Oceananigans.Models: compute_buffer_tendencies! using Oceananigans.TurbulenceClosures: required_halo_size_x, required_halo_size_y using Oceananigans.Grids: XFlatGrid, YFlatGrid +using Oceananigans.Utils: worksize # TODO: the code in this file is difficult to understand. # Rewriting it may be helpful. @@ -24,44 +25,48 @@ function compute_buffer_tendencies!(model::NonhydrostaticModel) return nothing end -# tendencies need computing in the range 1 : H and N - H + 1 : N +# Tendencies need computing in the range 1 : H and W - H + 1 : W where W = worksize. +# These buffer regions complement the halo-independent interior kernel (H+1:W-H). function buffer_tendency_kernel_parameters(grid, arch) Nx, Ny, Nz = size(grid) + Wx, Wy, _ = worksize(grid) Hx, Hy, _ = halo_size(grid) - param_west = (1:Hx, 1:Ny, 1:Nz) - param_east = (Nx-Hx+1:Nx, 1:Ny, 1:Nz) - param_south = (1:Nx, 1:Hy, 1:Nz) - param_north = (1:Nx, Ny-Hy+1:Ny, 1:Nz) + param_west = (1:Hx, 1:Wy, 1:Nz) + param_east = (Wx-Hx+1:Wx, 1:Wy, 1:Nz) + param_south = (1:Wx, 1:Hy, 1:Nz) + param_north = (1:Wx, Wy-Hy+1:Wy, 1:Nz) params = (param_west, param_east, param_south, param_north) return buffer_parameters(params, grid, arch) end -# p needs computing in the range 0 : 0 and N + 1 : N + 1 +# p needs computing in the range 0 : 0 and W + 1 : W + 1 where W = worksize function buffer_p_kernel_parameters(grid, arch) Nx, Ny, _ = size(grid) + Wx, Wy, _ = worksize(grid) - param_west = (0:0, 1:Ny) - param_east = (Nx+1:Nx+1, 1:Ny) - param_south = (1:Nx, 0:0) - param_north = (1:Nx, Ny+1:Ny+1) + param_west = (0:0, 1:Wy) + param_east = (Wx+1:Wx+1, 1:Wy) + param_south = (1:Wx, 0:0) + param_north = (1:Wx, Wy+1:Wy+1) params = (param_west, param_east, param_south, param_north) return buffer_parameters(params, grid, arch) end -# closure_fields need recomputing in the range 0 : B and N - B + 1 : N + 1 +# closure_fields need recomputing in the range 0 : B and W - B + 1 : W + 1 where W = worksize function buffer_κ_kernel_parameters(grid, closure, arch) Nx, Ny, Nz = size(grid) + Wx, Wy, _ = worksize(grid) Bx = required_halo_size_x(closure) By = required_halo_size_y(closure) - param_west = (0:Bx, 1:Ny, 1:Nz) - param_east = (Nx-Bx+1:Nx+1, 1:Ny, 1:Nz) - param_south = (1:Nx, 0:By, 1:Nz) - param_north = (1:Nx, Ny-By+1:Ny+1, 1:Nz) + param_west = (0:Bx, 1:Wy, 1:Nz) + param_east = (Wx-Bx+1:Wx+1, 1:Wy, 1:Nz) + param_south = (1:Wx, 0:By, 1:Nz) + param_north = (1:Wx, Wy-By+1:Wy+1, 1:Nz) params = (param_west, param_east, param_south, param_north) return buffer_parameters(params, grid, arch) diff --git a/src/OrthogonalSphericalShellGrids/OrthogonalSphericalShellGrids.jl b/src/OrthogonalSphericalShellGrids/OrthogonalSphericalShellGrids.jl index 65e6bca9e8c..b7ff7eda1f8 100644 --- a/src/OrthogonalSphericalShellGrids/OrthogonalSphericalShellGrids.jl +++ b/src/OrthogonalSphericalShellGrids/OrthogonalSphericalShellGrids.jl @@ -7,10 +7,11 @@ import Oceananigans import Oceananigans.Architectures: on_architecture using Oceananigans.Architectures: on_architecture, AbstractArchitecture, CPU, GPU -using Oceananigans.BoundaryConditions: BoundaryCondition, UZBC -using Oceananigans.Grids: AbstractTopology, RightConnected +using Oceananigans.BoundaryConditions: BoundaryCondition +using Oceananigans.Grids: AbstractTopology using Oceananigans.Grids: halo_size, generate_coordinate, topology using Oceananigans.Grids: total_length, add_halos, fill_metric_halo_regions! +using Oceananigans.BoundaryConditions: fill_halo_regions! using Distances: haversine using Adapt: Adapt, adapt diff --git a/src/OrthogonalSphericalShellGrids/distributed_tripolar_grid.jl b/src/OrthogonalSphericalShellGrids/distributed_tripolar_grid.jl index 137d9e71610..094d8a3b368 100644 --- a/src/OrthogonalSphericalShellGrids/distributed_tripolar_grid.jl +++ b/src/OrthogonalSphericalShellGrids/distributed_tripolar_grid.jl @@ -1,4 +1,4 @@ -using Oceananigans.BoundaryConditions: DistributedCommunicationBoundaryCondition +using Oceananigans.BoundaryConditions: DistributedCommunicationBoundaryCondition, ZBC using Oceananigans.Fields: validate_indices, validate_field_data using Oceananigans.DistributedComputations: DistributedComputations, @@ -11,7 +11,10 @@ using Oceananigans.DistributedComputations: concatenate_local_sizes, communication_buffers -using Oceananigans.Grids: topology, RightConnected, FullyConnected +using Oceananigans.Grids: topology, FullyConnected, + LeftConnectedRightFaceFolded, LeftConnectedRightFaceConnected +using Oceananigans.DistributedComputations: insert_connected_topology +using Oceananigans.Utils: Utils import Oceananigans.Fields: Field, validate_indices, validate_boundary_conditions @@ -109,8 +112,14 @@ function TripolarGrid(arch::Distributed, FT::DataType=Float64; Azᶜᶠᵃ = partition_tripolar_metric(global_grid, :Azᶜᶠᵃ, irange, jrange) Azᶠᶠᵃ = partition_tripolar_metric(global_grid, :Azᶠᶠᵃ, irange, jrange) - LY = yrank == 0 ? RightConnected : FullyConnected LX = workers[1] == 1 ? Periodic : FullyConnected + + global_fold_topology = topology(global_grid, 2) + + # 1-based indices for insert_connected_topology + Rx, Ry = workers[1], workers[2] + rx, ry = xrank + 1, yrank + 1 + LY = insert_connected_topology(global_fold_topology, Ry, ry, Rx, rx) ny = nylocal[yrank+1] nx = nxlocal[xrank+1] @@ -228,9 +237,6 @@ function receiving_rank(arch; receive_idx_x = ranks(arch)[1] - arch.local_index[ return receive_rank end -# a distributed `TripolarGrid` needs a `UPivotZipperBoundaryCondition` for the north boundary -# only on the last rank -# TODO: generalize to any ZipperBoundaryCondition function regularize_field_boundary_conditions(bcs::FieldBoundaryConditions, grid::MPITripolarGridOfSomeKind, field_name::Symbol, @@ -248,7 +254,7 @@ function regularize_field_boundary_conditions(bcs::FieldBoundaryConditions, south = regularize_boundary_condition(bcs.south, grid, loc, 2, LeftBoundary, prognostic_names) north = if yrank == processor_size[2] - 1 && processor_size[1] == 1 - UPivotZipperBoundaryCondition(sign) + north_fold_boundary_condition(fold_topology(grid.conformal_mapping))(sign) elseif yrank == processor_size[2] - 1 && processor_size[1] != 1 from = arch.local_rank @@ -280,26 +286,20 @@ function Field(loc::Tuple{<:LX, <:LY, <:LZ}, grid::MPITripolarGridOfSomeKind, da validate_field_data(loc, data, grid, indices) validate_boundary_conditions(loc, grid, old_bcs) - default_zipper = UPivotZipperBoundaryCondition(sign(LX, LY)) - if isnothing(old_bcs) || ismissing(old_bcs) new_bcs = old_bcs else new_bcs = inject_halo_communication_boundary_conditions(old_bcs, loc, arch.local_rank, arch.connectivity, topology(grid)) - # North boundary conditions are "special". If we are at the top of the domain, i.e. - # the last rank, then we need to substitute the BC only if the old one is not already - # a zipper boundary condition. Otherwise we always substitute because we need to - # inject the halo boundary conditions. if yrank == processor_size[2] - 1 && processor_size[1] == 1 - north_bc = if !(old_bcs.north isa UZBC) - default_zipper + north_bc = if !(old_bcs.north isa ZBC) + north_fold_boundary_condition(fold_topology(grid.conformal_mapping))(sign(LX, LY)) else old_bcs.north end elseif yrank == processor_size[2] - 1 && processor_size[1] != 1 - sgn = old_bcs.north isa UZBC ? old_bcs.north.condition : sign(LX, LY) + sgn = old_bcs.north isa ZBC ? old_bcs.north.condition : sign(LX, LY) from = arch.local_rank to = arch.connectivity.north halo_communication = ZipperHaloCommunicationRanks(sgn; from, to) @@ -317,7 +317,7 @@ function Field(loc::Tuple{<:LX, <:LY, <:LZ}, grid::MPITripolarGridOfSomeKind, da bottom=new_bcs.bottom) end - buffers = communication_buffers(grid, data, new_bcs) + buffers = communication_buffers(grid, data, new_bcs, (LX(), LY(), LZ())) return Field{LX, LY, LZ}(grid, data, new_bcs, indices, op, status, buffers) end @@ -347,7 +347,8 @@ function DistributedComputations.reconstruct_global_grid(grid::MPITripolarGrid) north_poles_latitude, first_pole_longitude, southernmost_latitude, - z) + z, + fold_topology = fold_topology(grid.conformal_mapping)) end function Grids.with_halo(new_halo, old_grid::MPITripolarGrid) @@ -361,12 +362,22 @@ function Grids.with_halo(new_halo, old_grid::MPITripolarGrid) north_poles_latitude = old_grid.conformal_mapping.north_poles_latitude first_pole_longitude = old_grid.conformal_mapping.first_pole_longitude southernmost_latitude = old_grid.conformal_mapping.southernmost_latitude - return TripolarGrid(arch, eltype(old_grid); halo = new_halo, size = N, north_poles_latitude, first_pole_longitude, southernmost_latitude, - z) + z, + fold_topology = fold_topology(old_grid.conformal_mapping)) end + +##### +##### Extend worksize for distributed FPivot grids (matches RFTRG worksize for serial FPivot) +##### + +const DistributedFPivotTopology = Union{LeftConnectedRightFaceFolded, LeftConnectedRightFaceConnected} +const DRFTRG = Union{MPITripolarGrid{<:Any, <:Any, <:DistributedFPivotTopology}, + ImmersedBoundaryGrid{<:Any, <:Any, <:DistributedFPivotTopology, <:Any, <:MPITripolarGrid}} + +Utils.worksize(grid::DRFTRG) = grid.Nx, grid.Ny+1, grid.Nz diff --git a/src/OrthogonalSphericalShellGrids/distributed_zipper.jl b/src/OrthogonalSphericalShellGrids/distributed_zipper.jl index 4c80de8be28..6e13930f4fa 100644 --- a/src/OrthogonalSphericalShellGrids/distributed_zipper.jl +++ b/src/OrthogonalSphericalShellGrids/distributed_zipper.jl @@ -1,108 +1,408 @@ -using Oceananigans.BoundaryConditions: get_boundary_kernels, DistributedCommunication -using Oceananigans.DistributedComputations: cooperative_waitall!, recv_from_buffers!, distributed_fill_halo_event! -using Oceananigans.DistributedComputations: CommunicationBuffers, fill_corners!, loc_id, AsynchronousDistributed -using Oceananigans.Fields: instantiated_location +using Oceananigans.BoundaryConditions: DistributedCommunication +using Oceananigans.DistributedComputations: CommunicationBuffers, loc_id +using Oceananigans.Grids: AbstractGrid, topology, + RightCenterFolded, RightFaceFolded, + LeftConnectedRightCenterFolded, LeftConnectedRightFaceFolded, + LeftConnectedRightCenterConnected, LeftConnectedRightFaceConnected +using Oceananigans.DistributedComputations: Distributed, on_architecture, ranks, x_communication_buffer -import Oceananigans.BoundaryConditions: fill_halo_regions! -import Oceananigans.DistributedComputations: synchronize_communication! +import Oceananigans.DistributedComputations: + y_communication_buffer, corner_communication_buffer, + _fill_north_send_buffer!, _recv_from_north_buffer!, + _fill_northwest_send_buffer!, _fill_northeast_send_buffer!, + _recv_from_northwest_buffer!, _recv_from_northeast_buffer!, + _fill_west_send_buffer!, _fill_east_send_buffer!, + _recv_from_west_buffer!, _recv_from_east_buffer! +import Oceananigans.Fields: communication_buffers @inline instantiate(T::DataType) = T() @inline instantiate(T) = T const DistributedZipper = BoundaryCondition{<:DistributedCommunication, <:ZipperHaloCommunicationRanks} -switch_north_halos!(c, north_bc, grid, loc) = nothing +##### +##### Topology unions for dispatch +##### -function switch_north_halos!(c, north_bc::DistributedZipper, grid, loc) - sign = north_bc.condition.sign - hz = halo_size(grid) - sz = size(grid) +const UPivotTopology = Union{RightCenterFolded, + LeftConnectedRightCenterFolded, + LeftConnectedRightCenterConnected} +const FPivotTopology = Union{RightFaceFolded, + LeftConnectedRightFaceFolded, + LeftConnectedRightFaceConnected} - _switch_north_halos!(parent(c), loc, sign, sz, hz) +const TwoDFoldTopology = Union{LeftConnectedRightCenterConnected, + LeftConnectedRightFaceConnected} - return nothing +# UPivot fold line is at Center-y; FPivot fold line is at Face-y +has_fold_line(::Type{<:UPivotTopology}, ::Center) = true +has_fold_line(::Type{<:UPivotTopology}, ::Face) = false +has_fold_line(::Type{<:FPivotTopology}, ::Center) = false +has_fold_line(::Type{<:FPivotTopology}, ::Face) = true + +# The fold has two pivot points due to x-periodicity: +# 1st pivot: between rx = Rx÷2 and rx = Rx÷2+1 +# 2nd pivot: at the periodic boundary between rx = Rx and rx = 1 +# The serial ifelse(i > Nx÷2, ...) writes the east half only. +# Corner conditions account for periodic wrap at rx=1 (NW) and rx=Rx (NE). + +# North buffer: east-of-pivot ranks write fold line +function north_writes_fold_line(arch) + rx = arch.local_index[1] + Rx = ranks(arch)[1] + return rx > Rx ÷ 2 +end + +# NW corner: west edge past pivot, or rx=1 (periodic wrap to east half) +function northwest_writes_fold_line(arch) + rx = arch.local_index[1] + Rx = ranks(arch)[1] + return rx > Rx ÷ 2 + 1 || rx == 1 +end + +# NE corner: east edge past pivot, but not rx=Rx (periodic wrap to west half) +function northeast_writes_fold_line(arch) + rx = arch.local_index[1] + Rx = ranks(arch)[1] + return rx >= Rx ÷ 2 && rx < Rx +end + +##### +##### Zipper communication buffers with fold-aware packing +##### + +struct TwoDZipperBuffer{FL, WFL, Loc, FoT, B, S} + send :: B + recv :: B + sign :: S +end + +struct ZipperCornerBuffer{FL, WFL, Loc, FoT, B, S} + send :: B + recv :: B + sign :: S +end + +# Value-argument constructors: all types inferred +TwoDZipperBuffer(loc, fot, send, recv, sign, ::Val{FL}, ::Val{WFL}) where {FL, WFL} = + TwoDZipperBuffer{FL, WFL, typeof(loc), typeof(fot), typeof(send), typeof(sign)}(send, recv, sign) +ZipperCornerBuffer(loc, fot, send, recv, sign, ::Val{FL}, ::Val{WFL}) where {FL, WFL} = + ZipperCornerBuffer{FL, WFL, typeof(loc), typeof(fot), typeof(send), typeof(sign)}(send, recv, sign) +ZipperCornerBuffer(::Type{Loc}, ::Type{FoT}, send, recv, sign, ::Val{FL}, ::Val{WFL}) where {Loc, FoT, FL, WFL} = + ZipperCornerBuffer{FL, WFL, Loc, FoT, typeof(send), typeof(sign)}(send, recv, sign) + +Adapt.adapt_structure(to, buff::TwoDZipperBuffer) = nothing +Adapt.adapt_structure(to, buff::ZipperCornerBuffer) = nothing + +# X-direction buffer for tripolar grids: like TwoDBuffer but with location-aware y-size +# and fold-line awareness. FL/WFL mirror the corner buffer pattern: +# - FL: buffer has fold-line row (same as corners, for MPI size matching) +# - WFL: recv writes the fold-line row (complement of the adjacent corner) +# West WFL = !NW corner WFL, East WFL = !NE corner WFL. +struct TripolarXBuffer{B, FL, WFL} + send :: B + recv :: B end -@inline reversed_halos(::Tuple{<:Any, <:Center, <:Any}, Ny, Hy) = Ny+2Hy-1:-1:Ny+Hy+1 -@inline reversed_halos(::Tuple{<:Any, <:Face, <:Any}, Ny, Hy) = Ny+2Hy:-1:Ny+Hy+2 +Adapt.adapt_structure(to, buff::TripolarXBuffer) = nothing -@inline adjust_x_face!(c, loc, north_halos, Px) = nothing -@inline adjust_x_face!(c, ::Tuple{<:Face, <:Any, <:Any}, north_halos, Px) = view(c, 2:Px, north_halos, :) .= view(c, 1:Px-1, north_halos, :) +TripolarXBuffer(send, recv, ::Val{FL}, ::Val{WFL}) where {FL, WFL} = + TripolarXBuffer{typeof(send), FL, WFL}(send, recv) -# We throw away the first point! -@inline function _switch_north_halos!(c, loc, sign, (Nx, Ny, Nz), (Hx, Hy, Hz)) +# X-buffers WFL: complement of the ADJACENT corner. +west_writes_fold_line(arch) = !northwest_writes_fold_line(arch) +east_writes_fold_line(arch) = !northeast_writes_fold_line(arch) - # Domain indices common for all locations - north_halos = Ny+Hy+1:Ny+2Hy-1 - west_corner = 1:Hx - east_corner = Nx+Hx+1:Nx+2Hx - interior = Hx+1:Nx+Hx +# TripolarXBuffer send: always pack full buffer (Ny_buf rows) +_fill_west_send_buffer!(c, buff::TripolarXBuffer, Hx, Hy, Nx, Ny) = + buff.send .= view(c, 1+Hx:2Hx, 1+Hy:size(buff.send,2)+Hy, :) +_fill_east_send_buffer!(c, buff::TripolarXBuffer, Hx, Hy, Nx, Ny) = + buff.send .= view(c, 1+Nx:Nx+Hx, 1+Hy:size(buff.send,2)+Hy, :) - # Location - dependent halo indices - reversed_north_halos = reversed_halos(loc, Ny, Hy) +# TripolarXBuffer recv FL=false: no fold line, write all Ny_buf rows +_recv_from_west_buffer!(c, buff::TripolarXBuffer{<:Any, false}, Hx, Hy, Nx, Ny) = + view(c, 1:Hx, 1+Hy:size(buff.recv,2)+Hy, :) .= buff.recv +_recv_from_east_buffer!(c, buff::TripolarXBuffer{<:Any, false}, Hx, Hy, Nx, Ny) = + view(c, 1+Nx+Hx:Nx+2Hx, 1+Hy:size(buff.recv,2)+Hy, :) .= buff.recv - view(c, west_corner, north_halos, :) .= sign .* reverse(view(c, west_corner, reversed_north_halos, :), dims = 1) - view(c, east_corner, north_halos, :) .= sign .* reverse(view(c, east_corner, reversed_north_halos, :), dims = 1) - view(c, interior, north_halos, :) .= sign .* reverse(view(c, interior, reversed_north_halos, :), dims = 1) +# TripolarXBuffer recv FL=true, WFL=true: write all Ny_buf rows (fold line included) +_recv_from_west_buffer!(c, buff::TripolarXBuffer{<:Any, true, true}, Hx, Hy, Nx, Ny) = + view(c, 1:Hx, 1+Hy:size(buff.recv,2)+Hy, :) .= buff.recv +_recv_from_east_buffer!(c, buff::TripolarXBuffer{<:Any, true, true}, Hx, Hy, Nx, Ny) = + view(c, 1+Nx+Hx:Nx+2Hx, 1+Hy:size(buff.recv,2)+Hy, :) .= buff.recv - # throw out first point for the x - face locations - adjust_x_face!(c, loc, north_halos, size(c, 1)) +# TripolarXBuffer recv FL=true, WFL=false: skip last row (the fold line) +_recv_from_west_buffer!(c, buff::TripolarXBuffer{<:Any, true, false}, Hx, Hy, Nx, Ny) = + view(c, 1:Hx, 1+Hy:size(buff.recv,2)-1+Hy, :) .= view(buff.recv, :, 1:size(buff.recv,2)-1, :) +_recv_from_east_buffer!(c, buff::TripolarXBuffer{<:Any, true, false}, Hx, Hy, Nx, Ny) = + view(c, 1+Nx+Hx:Nx+2Hx, 1+Hy:size(buff.recv,2)-1+Hy, :) .= view(buff.recv, :, 1:size(buff.recv,2)-1, :) - return nothing +# Fold-aware x-buffer constructors. +# Fallback for non-zipper north (south ranks): standard x-communication. +# Separate west/east constructors since they complement different corners. +function west_tripolar_buffer(arch, grid, data, Hx, bc, loc, + north::TwoDZipperBuffer{<:Any, <:Any, Loc, FoT}) where {Loc, FoT} + loc_y = Loc.parameters[2]() + fl = has_fold_line(FoT, loc_y) + Ny_buf = length(loc_y, FoT(), size(grid, 2)) + wfl = fl && west_writes_fold_line(arch) + _, _, Tz = size(parent(data)) + FT = eltype(data) + send = on_architecture(arch, zeros(FT, Hx, Ny_buf, Tz)) + recv = on_architecture(arch, zeros(FT, Hx, Ny_buf, Tz)) + return TripolarXBuffer(send, recv, Val(fl), Val(wfl)) end -# Disambiguation -fill_halo_regions!(c::OffsetArray, ::Nothing, indices, loc, ::MPITripolarGridOfSomeKind, args...; kwargs...) = nothing +function east_tripolar_buffer(arch, grid, data, Hx, bc, loc, + north::TwoDZipperBuffer{<:Any, <:Any, Loc, FoT}) where {Loc, FoT} + loc_y = Loc.parameters[2]() + fl = has_fold_line(FoT, loc_y) + Ny_buf = length(loc_y, FoT(), size(grid, 2)) + wfl = fl && east_writes_fold_line(arch) + _, _, Tz = size(parent(data)) + FT = eltype(data) + send = on_architecture(arch, zeros(FT, Hx, Ny_buf, Tz)) + recv = on_architecture(arch, zeros(FT, Hx, Ny_buf, Tz)) + return TripolarXBuffer(send, recv, Val(fl), Val(wfl)) +end + +# Fallback when north is not a zipper (south ranks) +west_tripolar_buffer(arch, grid, data, Hx, bc, loc, north) = x_communication_buffer(arch, grid, data, Hx, bc) +east_tripolar_buffer(arch, grid, data, Hx, bc, loc, north) = x_communication_buffer(arch, grid, data, Hx, bc) -function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::MPITripolarGridOfSomeKind, buffers::CommunicationBuffers, args...; kwargs...) +##### +##### Buffer construction for tripolar grids +##### +function communication_buffers(grid::MPITripolarGridOfSomeKind, data, bcs, loc) + Hx, Hy, Hz = halo_size(grid) arch = architecture(grid) - kernels!, ordered_bcs = get_boundary_kernels(bcs, c, grid, loc, indices) - number_of_tasks = length(kernels!) - outstanding_requests = length(arch.mpi_requests) + south = y_communication_buffer(arch, grid, data, Hy, bcs.south) + north = y_tripolar_buffer(arch, grid, data, Hy, bcs.north, loc) + + # x-buffers: separate west/east since they complement different corners (NW/NE) + west = west_tripolar_buffer(arch, grid, data, Hx, bcs.west, loc, north) + east = east_tripolar_buffer(arch, grid, data, Hx, bcs.east, loc, north) + + + sw = corner_communication_buffer(arch, grid, data, Hx, Hy, west, south) + se = corner_communication_buffer(arch, grid, data, Hx, Hy, east, south) + nw = northwest_tripolar_buffer(arch, grid, data, Hx, Hy, west, north) + ne = northeast_tripolar_buffer(arch, grid, data, Hx, Hy, east, north) + + return CommunicationBuffers(west, east, south, north, sw, se, nw, ne) +end + +# Fallback: non-zipper north BC uses standard buffer +y_tripolar_buffer(arch, grid, data, Hy, bc, loc) = y_communication_buffer(arch, grid, data, Hy, bc) + +# 2D fold (MxN) → TwoDZipperBuffer (interior-width, Hy′ = Hy or Hy+1 rows for fold line) +function y_tripolar_buffer(arch, grid::AbstractGrid{<:Any, <:Any, Topo}, + data, Hy, bc::DistributedZipper, loc::Loc) where {Topo <: TwoDFoldTopology, Loc} + Nx = size(grid, 1) + _, _, Tz = size(parent(data)) + FT = eltype(data) + sgn = bc.condition.sign + fl = has_fold_line(Topo, loc[2]) + Hy′ = fl ? Hy + 1 : Hy + wfl = fl && north_writes_fold_line(arch) + send = on_architecture(arch, zeros(FT, Nx, Hy′, Tz)) + recv = on_architecture(arch, zeros(FT, Nx, Hy′, Tz)) + return TwoDZipperBuffer(loc, Topo(), send, recv, sgn, Val(fl), Val(wfl)) +end + +# Fallbacks: non-zipper corners +northwest_tripolar_buffer(arch, grid, data, Hx, Hy, xedge, yedge) = corner_communication_buffer(arch, grid, data, Hx, Hy, xedge, yedge) +northeast_tripolar_buffer(arch, grid, data, Hx, Hy, xedge, yedge) = corner_communication_buffer(arch, grid, data, Hx, Hy, xedge, yedge) + +# Corner buffers are only needed for 2D (MxN) partitions. +# FL (fold line in buffer) is true for ALL corners when has_fold_line, to ensure +# MPI size matching between mirror partners. WFL (writes fold line) is per-corner, +# based on whether the corner's global x-position is past the pivot. + +function northwest_tripolar_buffer(arch, grid, data, Hx, Hy, xedge, + yedge::TwoDZipperBuffer{<:Any, <:Any, Loc, FoT}) where {Loc, FoT} + Tz = size(parent(data), 3); FT = eltype(data); sgn = yedge.sign + loc_x_inst = instantiate(Loc.parameters[1]) + fl = has_fold_line(FoT, Loc.parameters[2]()) + Hy′ = fl ? Hy + 1 : Hy + wfl = fl && northwest_writes_fold_line(arch) + Hx′ = nw_corner_nx(loc_x_inst, Hx) + send = on_architecture(arch, zeros(FT, Hx′, Hy′, Tz)) + recv = on_architecture(arch, zeros(FT, Hx′, Hy′, Tz)) + return ZipperCornerBuffer(Loc, FoT, send, recv, sgn, Val(fl), Val(wfl)) +end + +function northeast_tripolar_buffer(arch, grid, data, Hx, Hy, xedge, + yedge::TwoDZipperBuffer{<:Any, <:Any, Loc, FoT}) where {Loc, FoT} + Tz = size(parent(data), 3); FT = eltype(data); sgn = yedge.sign + loc_x_inst = instantiate(Loc.parameters[1]) + fl = has_fold_line(FoT, Loc.parameters[2]()) + Hy′ = fl ? Hy + 1 : Hy + wfl = fl && northeast_writes_fold_line(arch) + Hx′ = ne_corner_nx(loc_x_inst, Hx) + send = on_architecture(arch, zeros(FT, Hx′, Hy′, Tz)) + recv = on_architecture(arch, zeros(FT, Hx′, Hy′, Tz)) + return ZipperCornerBuffer(Loc, FoT, send, recv, sgn, Val(fl), Val(wfl)) +end + +##### +##### Location aliases for dispatch +##### + +const CC = Tuple{<:Center, <:Center, <:Any} +const FC = Tuple{<:Face, <:Center, <:Any} +const CF = Tuple{<:Center, <:Face, <:Any} +const FF = Tuple{<:Face, <:Face, <:Any} + +##### +##### Helper functions: compute ranges from location and topology +##### + +# Type-parameter accessors +@inline loc_x(::TwoDZipperBuffer{<:Any, <:Any, Loc}) where Loc = instantiate(Loc.parameters[1]) +@inline loc_y(::TwoDZipperBuffer{<:Any, <:Any, Loc}) where Loc = instantiate(Loc.parameters[2]) +@inline fold_topo(::TwoDZipperBuffer{<:Any, <:Any, <:Any, FoT}) where FoT = FoT() + +@inline loc_x(::ZipperCornerBuffer{<:Any, <:Any, Loc}) where Loc = instantiate(Loc.parameters[1]) +@inline loc_y(::ZipperCornerBuffer{<:Any, <:Any, Loc}) where Loc = instantiate(Loc.parameters[2]) +@inline fold_topo(::ZipperCornerBuffer{<:Any, <:Any, <:Any, FoT}) where FoT = FoT() + +# Send y-ranges (source rows, reversed for fold) +@inline send_fold_y(::UPivotTopology, ::Center, Hy, Ny) = Ny + Hy +@inline send_fold_y(::FPivotTopology, ::Face, Hy, Ny) = Ny + 1 + Hy + +@inline send_halo_y(::UPivotTopology, ::Center, Hy, Ny) = Ny+Hy-1:-1:Ny +@inline send_halo_y(topo, loc_y, Hy, Ny) = Ny+Hy:-1:Ny+1 + +# Recv y-ranges (destination rows in parent) +@inline recv_fold_y(::UPivotTopology, ::Center, Hy, Ny) = Ny + Hy +@inline recv_fold_y(::FPivotTopology, ::Face, Hy, Ny) = Ny + 1 + Hy + +@inline recv_halo_y(::FPivotTopology, ::Face, Hy, Ny) = 2+Ny+Hy:1+Ny+2Hy +@inline recv_halo_y(topo, loc_y, Hy, Ny) = 1+Ny+Hy:Ny+2Hy + +# Recv x-range for north buffer +@inline recv_x_range(::Center, Hx, Nx) = 1+Hx:Nx+Hx +@inline recv_x_range(::Face, Hx, Nx) = 2+Hx:Nx+Hx+1 + +# Corner send x-ranges (reversed for fold) +@inline nw_send_x(::Center, Hx, Nx) = 2Hx:-1:1+Hx +@inline nw_send_x(::Face, Hx, Nx) = 2Hx+1:-1:1+Hx +@inline ne_send_x(::Center, Hx, Nx) = Nx+Hx:-1:1+Nx +@inline ne_send_x(::Face, Hx, Nx) = Nx+Hx:-1:Nx+2 + +# Corner recv x-ranges +@inline nw_recv_x(::Center, Hx, Nx) = 1:Hx +@inline nw_recv_x(::Face, Hx, Nx) = 1:Hx+1 +@inline ne_recv_x(::Center, Hx, Nx) = 1+Nx+Hx:Nx+2Hx +@inline ne_recv_x(::Face, Hx, Nx) = 2+Nx+Hx:Nx+2Hx + +# Corner buffer x-size (for constructors) +@inline nw_corner_nx(::Center, Hx) = Hx +@inline nw_corner_nx(::Face, Hx) = Hx + 1 +@inline ne_corner_nx(::Center, Hx) = Hx +@inline ne_corner_nx(::Face, Hx) = Hx - 1 + +##### +##### TwoDZipperBuffer: north send (FL=true has fold line, FL=false does not) +##### - for task = 1:number_of_tasks - @inbounds distributed_fill_halo_event!(c, kernels![task], ordered_bcs[task], loc, grid, buffers, args...; kwargs...) - end +function _fill_north_send_buffer!(c, b::TwoDZipperBuffer{true}, Hx, Hy, Nx, Ny) + topo, ly = fold_topo(b), loc_y(b) + fy = send_fold_y(topo, ly, Hy, Ny) + view(b.send, :, 1:1, :) .= b.sign .* view(c, Nx+Hx:-1:1+Hx, fy:fy, :) + view(b.send, :, 2:Hy+1, :) .= b.sign .* view(c, Nx+Hx:-1:1+Hx, send_halo_y(topo, ly, Hy, Ny), :) +end - fill_corners!(c, arch.connectivity, indices, loc, arch, grid, buffers, args...; kwargs...) +function _fill_north_send_buffer!(c, b::TwoDZipperBuffer{false}, Hx, Hy, Nx, Ny) + topo, ly = fold_topo(b), loc_y(b) + b.send .= b.sign .* view(c, Nx+Hx:-1:1+Hx, send_halo_y(topo, ly, Hy, Ny), :) +end - # We increment the request counter only if we have actually initiated the MPI communication. - # This is the case only if at least one of the boundary conditions is a distributed communication - # boundary condition (DCBCT) _and_ the `only_local_halos` keyword argument is false. - if length(arch.mpi_requests) > outstanding_requests - arch.mpi_tag[] += 1 - end +##### +##### TwoDZipperBuffer: north recv (FL × WFL dispatch) +##### - if arch.mpi_tag[] == 0 # The communication has been reset, switch the north halos! - north_bc = bcs.north - switch_north_halos!(c, north_bc, grid, loc) - end +function _recv_from_north_buffer!(c, buff::TwoDZipperBuffer{true, true}, Hx, Hy, Nx, Ny) + xr = recv_x_range(loc_x(buff), Hx, Nx) + topo, ly = fold_topo(buff), loc_y(buff) + fy = recv_fold_y(topo, ly, Hy, Ny) + view(c, xr, fy:fy , :) .= view(buff.recv, :, 1:1 , :) + view(c, xr, recv_halo_y(topo, ly, Hy, Ny), :) .= view(buff.recv, :, 2:Hy+1, :) +end - return nothing +function _recv_from_north_buffer!(c, buff::TwoDZipperBuffer{true, false}, Hx, Hy, Nx, Ny) + xr = recv_x_range(loc_x(buff), Hx, Nx) + view(c, xr, recv_halo_y(fold_topo(buff), loc_y(buff), Hy, Ny), :) .= view(buff.recv, :, 2:Hy+1, :) end -function synchronize_communication!(field::Field{<:Any, <:Any, <:Any, <:Any, <:MPITripolarGridOfSomeKind}) - arch = architecture(field.grid) +function _recv_from_north_buffer!(c, buff::TwoDZipperBuffer{false}, Hx, Hy, Nx, Ny) + xr = recv_x_range(loc_x(buff), Hx, Nx) + view(c, xr, recv_halo_y(fold_topo(buff), loc_y(buff), Hy, Ny), :) .= buff.recv +end - if arch isa AsynchronousDistributed # Otherwise no need to synchronize - # Wait for outstanding requests - if !isempty(arch.mpi_requests) - cooperative_waitall!(arch.mpi_requests) +##### +##### Corner send: NW and NE (FL=true includes fold line, FL=false halo only) +##### - # Reset MPI tag - arch.mpi_tag[] = 0 +function _fill_northwest_send_buffer!(c, b::ZipperCornerBuffer{true}, Hx, Hy, Nx, Ny) + topo = fold_topo(b) + ly = loc_y(b) + fy = send_fold_y(topo, ly, Hy, Ny) + b.send .= b.sign .* view(c, nw_send_x(loc_x(b), Hx, Nx), fy:-1:fy-Hy, :) +end - # Reset MPI requests - empty!(arch.mpi_requests) - end +function _fill_northwest_send_buffer!(c, b::ZipperCornerBuffer{false}, Hx, Hy, Nx, Ny) + topo = fold_topo(b) + fy = send_halo_y(topo, loc_y(b), Hy, Ny) + b.send .= b.sign .* view(c, nw_send_x(loc_x(b), Hx, Nx), fy, :) +end - recv_from_buffers!(field.data, field.communication_buffers, field.grid) +function _fill_northeast_send_buffer!(c, b::ZipperCornerBuffer{true}, Hx, Hy, Nx, Ny) + topo = fold_topo(b) + ly = loc_y(b) + fy = send_fold_y(topo, ly, Hy, Ny) + b.send .= b.sign .* view(c, ne_send_x(loc_x(b), Hx, Nx), fy:-1:fy-Hy, :) +end - north_bc = field.boundary_conditions.north - switch_north_halos!(field, north_bc, field.grid, instantiated_location(field)) - end +function _fill_northeast_send_buffer!(c, b::ZipperCornerBuffer{false}, Hx, Hy, Nx, Ny) + topo = fold_topo(b) + fy = send_halo_y(topo, loc_y(b), Hy, Ny) + b.send .= b.sign .* view(c, ne_send_x(loc_x(b), Hx, Nx), fy, :) +end - return nothing +##### +##### Corner recv: NW and NE (FL × WFL dispatch) +##### + +function _recv_from_northwest_buffer!(c, buff::ZipperCornerBuffer{true, true}, Hx, Hy, Nx, Ny) + xr = nw_recv_x(loc_x(buff), Hx, Nx) + topo, ly = fold_topo(buff), loc_y(buff) + fy = recv_fold_y(topo, ly, Hy, Ny) + view(c, xr, fy:fy, :) .= view(buff.recv, :, 1:1, :) + view(c, xr, recv_halo_y(topo, ly, Hy, Ny), :) .= view(buff.recv, :, 2:size(buff.recv,2), :) end + +_recv_from_northwest_buffer!(c, buff::ZipperCornerBuffer{true, false}, Hx, Hy, Nx, Ny) = + view(c, nw_recv_x(loc_x(buff), Hx, Nx), recv_halo_y(fold_topo(buff), loc_y(buff), Hy, Ny), :) .= + view(buff.recv, :, 2:size(buff.recv,2), :) + +_recv_from_northwest_buffer!(c, buff::ZipperCornerBuffer{false}, Hx, Hy, Nx, Ny) = + view(c, nw_recv_x(loc_x(buff), Hx, Nx), recv_halo_y(fold_topo(buff), loc_y(buff), Hy, Ny), :) .= buff.recv + +function _recv_from_northeast_buffer!(c, buff::ZipperCornerBuffer{true, true}, Hx, Hy, Nx, Ny) + xr = ne_recv_x(loc_x(buff), Hx, Nx) + topo, ly = fold_topo(buff), loc_y(buff) + fy = recv_fold_y(topo, ly, Hy, Ny) + view(c, xr, fy:fy, :) .= view(buff.recv, :, 1:1, :) + view(c, xr, recv_halo_y(topo, ly, Hy, Ny), :) .= view(buff.recv, :, 2:size(buff.recv,2), :) +end + +_recv_from_northeast_buffer!(c, buff::ZipperCornerBuffer{true, false}, Hx, Hy, Nx, Ny) = + view(c, ne_recv_x(loc_x(buff), Hx, Nx), recv_halo_y(fold_topo(buff), loc_y(buff), Hy, Ny), :) .= + view(buff.recv, :, 2:size(buff.recv,2), :) + +_recv_from_northeast_buffer!(c, buff::ZipperCornerBuffer{false}, Hx, Hy, Nx, Ny) = + view(c, ne_recv_x(loc_x(buff), Hx, Nx), recv_halo_y(fold_topo(buff), loc_y(buff), Hy, Ny), :) .= buff.recv diff --git a/src/OrthogonalSphericalShellGrids/tripolar_field_extensions.jl b/src/OrthogonalSphericalShellGrids/tripolar_field_extensions.jl index d619ee50fb5..046731b0d31 100644 --- a/src/OrthogonalSphericalShellGrids/tripolar_field_extensions.jl +++ b/src/OrthogonalSphericalShellGrids/tripolar_field_extensions.jl @@ -4,7 +4,9 @@ using Oceananigans.BoundaryConditions: FieldBoundaryConditions, regularize_immersed_boundary_condition, LeftBoundary, RightBoundary -using Oceananigans.Grids: Grids, Center, Face +using Oceananigans.Grids: Grids, Center, Face, + LeftConnectedRightCenterFolded, LeftConnectedRightFaceFolded, + LeftConnectedRightCenterConnected, LeftConnectedRightFaceConnected using Oceananigans.BoundaryConditions: BoundaryConditions # A tripolar grid is always between 0 and 360 in longitude @@ -19,12 +21,17 @@ sign(::Type{Face}, ::Type{Center}) = - 1 # u-velocity type sign(::Type{Center}, ::Type{Face}) = - 1 # v-velocity type sign(::Type{Center}, ::Type{Center}) = 1 -# Determine the appropriate north fold boundary condition based on grid topology -# TODO: Implement proper topologies for distributed tripolar grids -# and remove the default fallback for AbstractTopology -north_fold_boundary_condition(::Type{<:AbstractTopology}) = UPivotZipperBoundaryCondition # Default fallback for distribtuted to work -north_fold_boundary_condition(::Type{RightCenterFolded}) = UPivotZipperBoundaryCondition -north_fold_boundary_condition(::Type{RightFaceFolded}) = FPivotZipperBoundaryCondition +# Determine the appropriate north fold boundary condition based on grid topology. +# Non-fold topologies (FullyConnected, RightConnected, etc.) default to UPivot — this +# value is only used as a placeholder; these ranks get their north BC overridden by +# inject_halo_communication_boundary_conditions or regularize_field_boundary_conditions. +north_fold_boundary_condition(::Type{<:AbstractTopology}) = UPivotZipperBoundaryCondition +north_fold_boundary_condition(::Type{RightCenterFolded}) = UPivotZipperBoundaryCondition +north_fold_boundary_condition(::Type{LeftConnectedRightCenterFolded}) = UPivotZipperBoundaryCondition +north_fold_boundary_condition(::Type{LeftConnectedRightCenterConnected}) = UPivotZipperBoundaryCondition +north_fold_boundary_condition(::Type{RightFaceFolded}) = FPivotZipperBoundaryCondition +north_fold_boundary_condition(::Type{LeftConnectedRightFaceFolded}) = FPivotZipperBoundaryCondition +north_fold_boundary_condition(::Type{LeftConnectedRightFaceConnected}) = FPivotZipperBoundaryCondition north_fold_boundary_condition(grid::TripolarGridOfSomeKind) = north_fold_boundary_condition(topology(grid, 2)) # a `TripolarGrid` needs a `UPivotZipperBoundaryCondition` for the north boundary diff --git a/src/OrthogonalSphericalShellGrids/tripolar_grid.jl b/src/OrthogonalSphericalShellGrids/tripolar_grid.jl index 38dbc58dfa4..9afae712d18 100644 --- a/src/OrthogonalSphericalShellGrids/tripolar_grid.jl +++ b/src/OrthogonalSphericalShellGrids/tripolar_grid.jl @@ -1,24 +1,37 @@ using Oceananigans.BoundaryConditions: UPivotZipperBoundaryCondition, FPivotZipperBoundaryCondition, NoFluxBoundaryCondition using Oceananigans.Grids: Grids, Bounded, Flat, OrthogonalSphericalShellGrid, Periodic, RectilinearGrid, architecture, cpu_face_constructor_z, validate_dimension_specification, - RightCenterFolded, RightFaceFolded + AbstractTopology, RightCenterFolded, RightFaceFolded using Oceananigans.ImmersedBoundaries: ImmersedBoundaryGrid """ - struct Tripolar{N, F, S} + struct Tripolar{N, F, S, TY<:AbstractTopology} A structure to represent a tripolar grid on an orthogonal spherical shell. +The fold topology `FT` (e.g., `RightCenterFolded` or `RightFaceFolded`) is stored +as a type parameter rather than a field, keeping the struct `isbits` for GPU kernels. """ -struct Tripolar{N, F, S} +struct Tripolar{N, F, S, TY<:AbstractTopology} north_poles_latitude :: N first_pole_longitude :: F southernmost_latitude :: S end -Adapt.adapt_structure(to, t::Tripolar) = +# Getter: returns the fold topology Type +fold_topology(::Tripolar{<:Any, <:Any, <:Any, TY}) where TY = TY + +# Constructor accepting fold topology as a Type argument +Tripolar(n, f, s, ::Type{TY}) where {TY<:AbstractTopology} = + Tripolar{typeof(n), typeof(f), typeof(s), TY}(n, f, s) + +# Backward-compatible constructor (defaults to UPivot) +Tripolar(n, f, s) = Tripolar(n, f, s, RightCenterFolded) + +Adapt.adapt_structure(to, t::Tripolar{<:Any, <:Any, <:Any, TY}) where TY = Tripolar(Adapt.adapt(to, t.north_poles_latitude), Adapt.adapt(to, t.first_pole_longitude), - Adapt.adapt(to, t.southernmost_latitude)) + Adapt.adapt(to, t.southernmost_latitude), + TY) const TripolarGrid{FT, TX, TY, TZ, CZ, CC, FC, CF, FF, Arch} = OrthogonalSphericalShellGrid{FT, TX, TY, TZ, CZ, <:Tripolar, CC, FC, CF, FF, Arch} const TripolarGridOfSomeKind{FT, TX, TY, TZ} = Union{TripolarGrid{FT, TX, TY, TZ}, ImmersedBoundaryGrid{FT, TX, TY, TZ, <:TripolarGrid}} @@ -336,7 +349,7 @@ function TripolarGrid(arch = CPU(), FT::DataType = Oceananigans.defaults.FloatTy on_architecture(arch, map(FT, Azᶜᶠᵃ)), on_architecture(arch, map(FT, Azᶠᶠᵃ)), convert(FT, radius), - Tripolar(north_poles_latitude, first_pole_longitude, southernmost_latitude)) + Tripolar(north_poles_latitude, first_pole_longitude, southernmost_latitude, fold_topology)) return grid end diff --git a/test/distributed_tests_utils.jl b/test/distributed_tests_utils.jl index c034abbc9ff..61dc75b1bed 100644 --- a/test/distributed_tests_utils.jl +++ b/test/distributed_tests_utils.jl @@ -4,8 +4,6 @@ using Oceananigans.DistributedComputations: reconstruct_global_field, reconstruc using Oceananigans.Units using Oceananigans.TimeSteppers: first_time_step! -import Oceananigans.BoundaryConditions: _fill_north_halo! -using Oceananigans.BoundaryConditions: UZBC, CCLocation, FCLocation include("dependencies_for_runtests.jl") @@ -30,45 +28,6 @@ function analytical_immersed_tripolar_grid(underlying_grid::TripolarGrid; radius return grid end -# The serial version of the TripolarGrid substitutes the second half of the last row of the grid. -# This is not done in the distributed version, so we need to undo this substitution if we want to -# compare the results. Otherwise very tiny differences caused by finite precision computations -# will appear in the last row of the grid. - -# tracers or similar fields -@inline _fill_north_halo!(i, k, grid, c, bc::UZBC, ::CCLocation, args...) = my_fold_north_center_center_upivot!(i, k, grid, bc.condition, c) -@inline _fill_north_halo!(i, k, grid, u, bc::UZBC, ::FCLocation, args...) = my_fold_north_face_center_upivot!(i, k, grid, bc.condition, u) - -@inline function my_fold_north_face_center_upivot!(i, k, grid, sign, c) - Nx, Ny, _ = size(grid) - - i′ = Nx - i + 2 # Remember! element Nx + 1 does not exist! - sign = ifelse(i′ > Nx , abs(sign), sign) # for periodic elements we change the sign - i′ = ifelse(i′ > Nx, i′ - Nx, i′) # Periodicity is hardcoded in the x-direction!! - Hy = grid.Hy - - for j = 1 : Hy - @inbounds begin - c[i, Ny + j, k] = sign * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1 - end - end - - return nothing -end - -@inline function my_fold_north_center_center_upivot!(i, k, grid, sign, c) - Nx, Ny, _ = size(grid) - - i′ = Nx - i + 1 - Hy = grid.Hy - - for j = 1 : Hy - @inbounds c[i, Ny + j, k] = sign * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1 - end - - return nothing -end - # Run the distributed grid simulation and save down reconstructed results function run_distributed_tripolar_grid(arch, filename) grid = TripolarGrid(arch; size = (40, 40, 1), z = (-1000, 0), halo = (5, 5, 5)) diff --git a/validation/orthogonal_spherical_shell_grid/halo_fill_viz.jl b/validation/orthogonal_spherical_shell_grid/halo_fill_viz.jl index 29f92fddd2e..857e408cc5f 100644 --- a/validation/orthogonal_spherical_shell_grid/halo_fill_viz.jl +++ b/validation/orthogonal_spherical_shell_grid/halo_fill_viz.jl @@ -60,7 +60,7 @@ for fold_topology in fold_topologies color = :red, marker = :star5, markersize = 15, label = "Pivot points" ) # default grid underlay - offset = (fold_topology == RightFaceFolded) ? 1 : 1/2 # how much more grid north of pivots + offset = (fold_topology == RightFaceFolded) ? 0 : 1/2 # how much more grid north of pivots interior_poly_x = pivoti .+ [-Nx ÷ 2, Nx ÷ 2, Nx ÷ 2, -Nx ÷ 2] interior_poly_y = pivotj + offset .+ [0, 0, -Ny, -Ny] poly!(ax, interior_poly_x, interior_poly_y, color = (:black, 0.05), strokecolor = :black, strokewidth = 1) @@ -74,7 +74,8 @@ for fold_topology in fold_topologies radius = sqrt(sum((source .- destination) .^ 2)) / 2 start_angle = atan(destination[2] - origin[2], destination[1] - origin[1]) stop_angle = start_angle - π - scsrc = scatter!(ax, source...; color = Cycled(j), label = "Interior source points") + jcolor = Nyfield + 1 - source[2] + scsrc = scatter!(ax, source...; color = Cycled(jcolor), label = "Interior source points") # scatter!(ax, destination...; marker = :rect, color = Cycled(j), markersize = 20) scdest = scatter!(ax, destination...; marker = :rect, color = :white, markersize = 15, @@ -84,7 +85,7 @@ for fold_topology in fold_topologies # dtriangle = Polygon([Point(0, 0), Point(1, 1), Point(-1, 1)]) scutri = scatter!(ax, destination...; marker = :dtriangle, rotation = stop_angle, color = Cycled(j)) translate!(scutri, 0, 0, 2) # Hack more - ar = arc!(ax, origin, radius, start_angle, stop_angle, color = Cycled(j)) + ar = arc!(ax, origin, radius, start_angle, stop_angle, color = Cycled(jcolor)) translate!(ar, 0, 0, 2) # Hack more # add legend once