CliMA
diff --git a/‎docs/src/remapping.md‎
Lines changed: 11 additions & 2 deletions b/‎docs/src/remapping.md‎
Lines changed: 11 additions & 2 deletions
diff --git a/‎examples/remap_visualization.jl‎
Lines changed: 186 additions & 0 deletions b/‎examples/remap_visualization.jl‎
Lines changed: 186 additions & 0 deletions
diff --git a/‎ext/cuda/remapping_distributed.jl‎
Lines changed: 155 additions & 0 deletions b/‎ext/cuda/remapping_distributed.jl‎
Lines changed: 155 additions & 0 deletions
@@ -45,8 +45,12 @@ given `field`. To obtain such coordinates, you can call the
 functions. These functions return an `Array` with the coordinates over which
 interpolation will occur. These arrays are of type `Geometry.Point`s.
 
-By default, vertical interpolation is switched off and the `field` is evaluated
-directly on the levels.
+By default, vertical interpolation is off (field evaluated on levels). Horizontal
+interpolation: `:spectral` (default; uses spectral element quadrature weights) or `:bilinear`:
+
+```julia
+interpolated_array = Remapping.interpolate(field; horizontal_method = :bilinear)
+```
 
 `ClimaCore.Remapping.interpolate` allocates new output arrays. As such, it is
 not suitable for performance-critical applications.
@@ -76,9 +80,14 @@ hcoords = [Geometry.LatLongPoint(lat, long) for long in longpts, lat in latpts]
 zcoords = [Geometry.ZPoint(z) for z in zpts]
 
 interpolated_array = interpolate(field, hcoords, zcoords)
+# Bilinear: interpolate(field, hcoords, zcoords; horizontal_method = :bilinear)
 ```
 The output is defined on the Cartesian product of `hcoords` with `zcoords`.
 
+#### Diagnostics and NetCDF writers
+
+Pass `horizontal_method` through when remapping for output (e.g. `Remapping.interpolate(..., horizontal_method = :bilinear)` or `Remapper(..., horizontal_method = :bilinear)`).
+
 If the default target coordinates are being used, it is possible to broadcast
 `ClimaCore.Geometry.components` to extract them as a vector of tuples (and then
 broadcast `getindex` to extract the respective coordinates as vectors).
 
@@ -0,0 +1,186 @@
+# Visualize bilinear and spectral interpolation remapping: slotted cylinder (Zalesak)
+#
+# Compares bilinear vs spectral horizontal remap on the slotted cylinder test case
+# (disk with rectangular slot; f ∈ {0, 1}). Parameters highlight spectral (Lagrange)
+# overshoot/undershoot vs bilinear.
+#
+# Run from repo root with:
+#   julia --project=examples examples/remap_visualization.jl
+# or with the main project:
+#   julia --project=. examples/remap_visualization.jl
+
+using ClimaComms
+using ClimaCore:
+    Geometry,
+    Domains,
+    Meshes,
+    Topologies,
+    Spaces,
+    Fields,
+    Remapping,
+    Quadratures
+using CairoMakie
+
+device = ClimaComms.CPUSingleThreaded()
+
+nelements_horz = 6   # horizontal elements per dimension
+Nq = 4               # GLL points per dimension
+n_interp = 24        # target grid resolution for interpolation
+
+# Slotted cylinder (Zalesak): disk with rectangular slot; f ∈ {0, 1}
+slot_radius = 0.15
+slot_cx, slot_cy = 0.5, 0.5
+slot_half_width = 0.025
+slot_y_hi = slot_cy + slot_radius
+
+# --- Domain: square [0, 1] × [0, 1] (periodic) ---
+horzdomain = Domains.RectangleDomain(
+    Geometry.XPoint(0.0) .. Geometry.XPoint(1.0),
+    Geometry.YPoint(0.0) .. Geometry.YPoint(1.0),
+    x1periodic = true,
+    x2periodic = true,
+)
+
+# --- Vertical: single layer ---
+vertdomain = Domains.IntervalDomain(
+    Geometry.ZPoint(0.0),
+    Geometry.ZPoint(1.0);
+    boundary_names = (:bottom, :top),
+)
+vertmesh = Meshes.IntervalMesh(vertdomain, nelems = 1)
+verttopo = Topologies.IntervalTopology(ClimaComms.SingletonCommsContext(device), vertmesh)
+vert_center_space = Spaces.CenterFiniteDifferenceSpace(verttopo)
+
+# --- Horizontal: spectral elements ---
+quad = Quadratures.GLL{Nq}()
+horzmesh = Meshes.RectilinearMesh(horzdomain, nelements_horz, nelements_horz)
+horztopology = Topologies.Topology2D(ClimaComms.SingletonCommsContext(device), horzmesh)
+horzspace = Spaces.SpectralElementSpace2D(horztopology, quad)
+hv_center_space = Spaces.ExtrudedFiniteDifferenceSpace(horzspace, vert_center_space)
+
+# --- Slotted cylinder field ---
+coords = Fields.coordinate_field(hv_center_space)
+function slotted_cylinder(x, y)
+    in_disk = (x - slot_cx)^2 + (y - slot_cy)^2 <= slot_radius^2
+    in_slot = (abs(x - slot_cx) <= slot_half_width) && (y >= slot_cy) && (y <= slot_y_hi)
+    return (in_disk && !in_slot) ? 1.0 : 0.0
+end
+field = @. slotted_cylinder(coords.x, coords.y)
+Spaces.weighted_dss!(field)
+
+# --- Target grid: uniform n_interp×n_interp, single vertical level ---
+xpts = range(Geometry.XPoint(0.0), Geometry.XPoint(1.0), length = n_interp)
+ypts = range(Geometry.YPoint(0.0), Geometry.YPoint(1.0), length = n_interp)
+zpts = range(Geometry.ZPoint(0.5), Geometry.ZPoint(0.5), length = 1)
+
+# --- Interpolate: bilinear and spectral ---
+interp_bilinear =
+    Remapping.interpolate_array(field, xpts, ypts, zpts; horizontal_method = :bilinear)
+interp_spectral =
+    Remapping.interpolate_array(field, xpts, ypts, zpts; horizontal_method = :spectral)
+interp_bilinear_2d = interp_bilinear[:, :, 1]
+interp_spectral_2d = interp_spectral[:, :, 1]
+err_bilinear_spectral = interp_bilinear_2d .- interp_spectral_2d
+
+# --- Non-negativity stats (source f ∈ {0, 1}) ---
+min_bilinear, max_bilinear = extrema(interp_bilinear_2d)
+min_spectral, max_spectral = extrema(interp_spectral_2d)
+n_neg = count(<(0), interp_spectral_2d)
+n_gt1 = count(>(1), interp_spectral_2d)
+@info "Slotted cylinder: non-negativity (source f ∈ {0,1})" bilinear_min = min_bilinear bilinear_max =
+    max_bilinear spectral_min = min_spectral spectral_max = max_spectral spectral_below_0 =
+    n_neg spectral_above_1 = n_gt1
+
+# --- Raw spectral element grid (GLL nodes, v=1) ---
+x_se = Float64[]
+y_se = Float64[]
+vals_se = Float64[]
+Fields.byslab(hv_center_space) do slabidx
+    slabidx.v == 1 || return
+    x_data = parent(Fields.slab(coords.x, slabidx))
+    y_data = parent(Fields.slab(coords.y, slabidx))
+    f_data = parent(Fields.slab(field, slabidx))
+    for j in 1:Nq, i in 1:Nq
+        push!(x_se, x_data[i, j, 1])
+        push!(y_se, y_data[i, j, 1])
+        push!(vals_se, f_data[i, j, 1])
+    end
+end
+
+x_plot = [p.x for p in xpts]
+y_plot = [p.y for p in ypts]
+boundary_pos = (0:nelements_horz) ./ nelements_horz
+
+# --- Figure: bilinear | spectral | error; row 2 = raw GLL nodes ---
+fig = Figure(size = (1200, 800))
+
+ax1 = Axis(fig[1, 1], title = "Bilinear ($n_interp×$n_interp)", xlabel = "x", ylabel = "y")
+hm1 = heatmap!(
+    ax1,
+    x_plot,
+    y_plot,
+    interp_bilinear_2d';
+    colorrange = (0, 1),
+    colormap = :viridis,
+    lowclip = :orange,
+    highclip = :red,
+)
+Colorbar(fig[1, 2], hm1; label = "value")
+
+ax2 = Axis(fig[1, 3], title = "Spectral ($n_interp×$n_interp)", xlabel = "x", ylabel = "y")
+hm2 = heatmap!(
+    ax2,
+    x_plot,
+    y_plot,
+    interp_spectral_2d';
+    colorrange = (0, 1),
+    colormap = :viridis,
+    lowclip = :orange,
+    highclip = :red,
+)
+Colorbar(fig[1, 4], hm2; label = "value")
+
+ax3 = Axis(
+    fig[1, 5],
+    title = "Error (bilinear − spectral)",
+    xlabel = "x",
+    ylabel = "y",
+)
+erange = extrema(err_bilinear_spectral)
+hm3 = heatmap!(
+    ax3,
+    x_plot,
+    y_plot,
+    err_bilinear_spectral';
+    colorrange = erange,
+    colormap = :RdBu,
+)
+Colorbar(fig[1, 6], hm3; label = "error")
+
+ax_se = Axis(
+    fig[2, 1],
+    title = "Raw spectral element grid (GLL nodes)",
+    xlabel = "x",
+    ylabel = "y",
+)
+sc_se = scatter!(
+    ax_se,
+    y_se,
+    x_se;
+    color = vals_se,
+    colorrange = (0, 1),
+    colormap = :viridis,
+    lowclip = :orange,
+    highclip = :red,
+    markersize = 8,
+)
+vlines!(ax_se, boundary_pos; color = :pink, linewidth = 2)
+hlines!(ax_se, boundary_pos; color = :pink, linewidth = 2)
+limits!(ax_se, 0, 1, 0, 1)
+Colorbar(fig[2, 2], sc_se; label = "value")
+
+outdir = joinpath(@__DIR__, "output")
+mkpath(outdir)
+outpath = joinpath(outdir, "remap_slotted_cylinder_$(n_interp)x$(n_interp).png")
+save(outpath, fig)
+@info "Saved to $outpath"
@@ -4,6 +4,161 @@ import CUDA
 using CUDA: @cuda
 import ClimaCore.Remapping: _set_interpolated_values_device!
 
+# Bilinear in (s,t) ∈ [0,1]² (local to 2-point cell; reference element is [-1,1]²):
+# (1-s)(1-t)*c11 + s*(1-t)*c21 + (1-s)*t*c12 + s*t*c22
+@inline _bilinear(c11, c21, c22, c12, s, t) =
+    (1 - s) * (1 - t) * c11 + s * (1 - t) * c21 + (1 - s) * t * c12 + s * t * c22
+
+# Bilinear path on GPU: pure GPU kernels (no scalar indexing).
+# 2-point cell (i..i+1, j..j+1) containing (ξ1, ξ2); interpolate between nodes within the element.
+function ClimaCore.Remapping._set_interpolated_values_bilinear!(
+    out::CUDA.CuArray,
+    fields::AbstractArray{<:Fields.Field},
+    scratch_corners,
+    local_horiz_indices,
+    vert_interpolation_weights::AbstractArray,
+    vert_bounding_indices::AbstractArray,
+    local_bilinear_s,
+    local_bilinear_t,
+    local_bilinear_i,
+    local_bilinear_j,
+)
+    field_values = tuple(map(f -> Fields.field_values(f), fields)...)
+    num_horiz = length(local_horiz_indices)
+    num_vert = length(vert_bounding_indices)
+    num_fields = length(field_values)
+    nitems = length(out)
+    args = (
+        out,
+        local_horiz_indices,
+        local_bilinear_s,
+        local_bilinear_t,
+        local_bilinear_i,
+        local_bilinear_j,
+        vert_interpolation_weights,
+        vert_bounding_indices,
+        field_values,
+    )
+    threads = threads_via_occupancy(set_interpolated_values_bilinear_3d_kernel!, args)
+    p = linear_partition(nitems, threads)
+    auto_launch!(
+        set_interpolated_values_bilinear_3d_kernel!,
+        args;
+        threads_s = (p.threads,),
+        blocks_s = (p.blocks,),
+    )
+end
+
+function set_interpolated_values_bilinear_3d_kernel!(
+    out,
+    local_horiz_indices,
+    local_bilinear_s,
+    local_bilinear_t,
+    local_bilinear_i,
+    local_bilinear_j,
+    vert_interpolation_weights,
+    vert_bounding_indices,
+    field_values,
+)
+    num_horiz = length(local_horiz_indices)
+    num_vert = length(vert_bounding_indices)
+    num_fields = length(field_values)
+    inds = (num_horiz, num_vert, num_fields)
+    i_thread =
+        (CUDA.blockIdx().x - Int32(1)) * CUDA.blockDim().x + CUDA.threadIdx().x
+    1 ≤ i_thread ≤ prod(inds) || return nothing
+    (i_out, j_v, k) = CartesianIndices(map(x -> Base.OneTo(x), inds))[i_thread].I
+    @inbounds begin
+        CI = CartesianIndex
+        h = local_horiz_indices[i_out]
+        v_lo, v_hi = vert_bounding_indices[j_v]
+        A, B = vert_interpolation_weights[j_v]
+        s = local_bilinear_s[i_out]
+        t = local_bilinear_t[i_out]
+        ii = local_bilinear_i[i_out]
+        jj = local_bilinear_j[i_out]
+        fvals = field_values[k]
+        # Four nodes of 2-point cell: (ii,jj), (ii+1,jj), (ii+1,jj+1), (ii,jj+1)
+        c11 = A * fvals[CI(ii, jj, 1, v_lo, h)] + B * fvals[CI(ii, jj, 1, v_hi, h)]
+        c21 = A * fvals[CI(ii + 1, jj, 1, v_lo, h)] + B * fvals[CI(ii + 1, jj, 1, v_hi, h)]
+        c22 =
+            A * fvals[CI(ii + 1, jj + 1, 1, v_lo, h)] +
+            B * fvals[CI(ii + 1, jj + 1, 1, v_hi, h)]
+        c12 = A * fvals[CI(ii, jj + 1, 1, v_lo, h)] + B * fvals[CI(ii, jj + 1, 1, v_hi, h)]
+        out[i_out, j_v, k] = _bilinear(c11, c21, c22, c12, s, t)
+    end
+    return nothing
+end
+
+function ClimaCore.Remapping._set_interpolated_values_bilinear!(
+    out::CUDA.CuArray,
+    fields::AbstractArray{<:Fields.Field},
+    scratch_corners,
+    local_horiz_indices,
+    ::Nothing,
+    ::Nothing,
+    local_bilinear_s,
+    local_bilinear_t,
+    local_bilinear_i,
+    local_bilinear_j,
+)
+    field_values = tuple(map(f -> Fields.field_values(f), fields)...)
+    num_horiz = length(local_horiz_indices)
+    num_fields = length(field_values)
+    nitems = length(out)
+    args = (
+        out,
+        local_horiz_indices,
+        local_bilinear_s,
+        local_bilinear_t,
+        local_bilinear_i,
+        local_bilinear_j,
+        field_values,
+    )
+    threads = threads_via_occupancy(set_interpolated_values_bilinear_2d_kernel!, args)
+    p = linear_partition(nitems, threads)
+    auto_launch!(
+        set_interpolated_values_bilinear_2d_kernel!,
+        args;
+        threads_s = (p.threads,),
+        blocks_s = (p.blocks,),
+    )
+end
+
+function set_interpolated_values_bilinear_2d_kernel!(
+    out,
+    local_horiz_indices,
+    local_bilinear_s,
+    local_bilinear_t,
+    local_bilinear_i,
+    local_bilinear_j,
+    field_values,
+)
+    num_horiz = length(local_horiz_indices)
+    num_fields = length(field_values)
+    inds = (num_horiz, num_fields)
+    i_thread =
+        (CUDA.blockIdx().x - Int32(1)) * CUDA.blockDim().x + CUDA.threadIdx().x
+    1 ≤ i_thread ≤ prod(inds) || return nothing
+    (i_out, k) = CartesianIndices(map(x -> Base.OneTo(x), inds))[i_thread].I
+    @inbounds begin
+        CI = CartesianIndex
+        h = local_horiz_indices[i_out]
+        s = local_bilinear_s[i_out]
+        t = local_bilinear_t[i_out]
+        ii = local_bilinear_i[i_out]
+        jj = local_bilinear_j[i_out]
+        fvals = field_values[k]
+        # Four nodes of 2-point cell: (ii,jj), (ii+1,jj), (ii+1,jj+1), (ii,jj+1)
+        c11 = fvals[CI(ii, jj, 1, 1, h)]
+        c21 = fvals[CI(ii + 1, jj, 1, 1, h)]
+        c22 = fvals[CI(ii + 1, jj + 1, 1, 1, h)]
+        c12 = fvals[CI(ii, jj + 1, 1, 1, h)]
+        out[i_out, k] = _bilinear(c11, c21, c22, c12, s, t)
+    end
+    return nothing
+end
+
 
 function _set_interpolated_values_device!(
     out::AbstractArray,