does this work on gpu

szy21 · szy21 · commit c98b81f26947 · 2026-01-25T22:13:13.000-08:00
diff --git a/src/prognostic_equations/advection.jl b/src/prognostic_equations/advection.jl
@@ -73,7 +73,8 @@ NVTX.@annotate function horizontal_dynamics_tendency!(Yₜ, Y, p, t)
 
     (; ᶜq_tot_safe) = p.precomputed
     ᶜΦ_r = @. lazy(phi_r(thermo_params, ᶜp))
-    ᶜθ_v = @. lazy(theta_v(thermo_params, ᶜT, ᶜp, ᶜq_tot_safe, ᶜq_liq_rai, ᶜq_ice_sno))
+    ᶜθ_v = p.scratch.ᶜtemp_scalar
+    @. ᶜθ_v = theta_v(thermo_params, ᶜT, ᶜp, ᶜq_tot_safe, ᶜq_liq_rai, ᶜq_ice_sno)
     ᶜθ_vr = @. lazy(theta_vr(thermo_params, ᶜp))
     ᶜΠ = @. lazy(TD.exner_given_pressure(thermo_params, ᶜp))
     ᶜθ_v_diff = @. lazy(ᶜθ_v - ᶜθ_vr)
diff --git a/src/prognostic_equations/implicit/implicit_tendency.jl b/src/prognostic_equations/implicit/implicit_tendency.jl
@@ -196,7 +196,8 @@ function implicit_vertical_advection_tendency!(Yₜ, Y, p, t)
 
     # This is equivalent to grad_v(Φ) + grad_v(p) / ρ
     ᶜΦ_r = @. lazy(phi_r(thermo_params, ᶜp))
-    ᶜθ_v = @. lazy(theta_v(thermo_params, ᶜT, ᶜp, ᶜq_tot_safe, ᶜq_liq_rai, ᶜq_ice_sno))
+    ᶜθ_v = p.scratch.ᶜtemp_scalar
+    @. ᶜθ_v = theta_v(thermo_params, ᶜT, ᶜp, ᶜq_tot_safe, ᶜq_liq_rai, ᶜq_ice_sno)
     ᶜθ_vr = @. lazy(theta_vr(thermo_params, ᶜp))
     ᶜΠ = @. lazy(TD.exner_given_pressure(thermo_params, ᶜp))
     @. Yₜ.f.u₃ -= ᶠgradᵥ_ᶜΦ - ᶠgradᵥ(ᶜΦ_r) +
diff --git a/src/prognostic_equations/implicit/manual_sparse_jacobian.jl b/src/prognostic_equations/implicit/manual_sparse_jacobian.jl
@@ -479,7 +479,7 @@ function update_jacobian!(alg::ManualSparseJacobian, cache, Y, p, dtγ, t)
     ᶠz = Fields.coordinate_field(Y.f).z
     zmax = z_max(axes(Y.f))
 
-    ᶜkappa_m = p.scratch.ᶜtemp_scalar
+    ᶜkappa_m = p.scratch.ᶜtemp_scalar_2
     @. ᶜkappa_m =
         TD.gas_constant_air(thermo_params, ᶜq_tot_safe, ᶜq_liq_rai, ᶜq_ice_sno) /
         TD.cv_m(thermo_params, ᶜq_tot_safe, ᶜq_liq_rai, ᶜq_ice_sno)
@@ -533,7 +533,8 @@ function update_jacobian!(alg::ManualSparseJacobian, cache, Y, p, dtγ, t)
     ∂ᶠu₃_err_∂ᶜρ = matrix[@name(f.u₃), @name(c.ρ)]
     ∂ᶠu₃_err_∂ᶜρe_tot = matrix[@name(f.u₃), @name(c.ρe_tot)]
 
-    ᶜθ_v = @. lazy(theta_v(thermo_params, ᶜT, ᶜp, ᶜq_tot_safe, ᶜq_liq_rai, ᶜq_ice_sno))
+    ᶜθ_v = p.scratch.ᶜtemp_scalar
+    @. ᶜθ_v = theta_v(thermo_params, ᶜT, ᶜp, ᶜq_tot_safe, ᶜq_liq_rai, ᶜq_ice_sno)
     ᶜΠ = @. lazy(TD.exner_given_pressure(thermo_params, ᶜp))
     # In implicit tendency, we use the new pressure-gradient formulation (PGF) and gravitational acceleration: 
     #              grad(p) / ρ + grad(Φ)  =  cp_d * θ_v * grad(Π) + grad(Φ).
diff --git a/test/gpu_horizontal_dynamics_mwe.jl b/test/gpu_horizontal_dynamics_mwe.jl
@@ -0,0 +1,265 @@
+#=
+Minimal Working Example (MWE) to reproduce GPU issue with horizontal dynamics tendency.
+
+This reproduces the issue on lines 82-88 of src/prognostic_equations/advection.jl:
+    @. Yₜ.c.uₕ -= C12(
+        gradₕ(ᶜK + ᶜΦ - ᶜΦ_r) +
+        cp_d *
+        (
+            ᶜθ_v_diff * gradₕ(ᶜΠ) + gradₕ(ᶜθ_v_diff * ᶜΠ) - ᶜΠ * gradₕ(ᶜθ_v_diff)
+        ) / 2,
+    )
+
+The issue is that gradₕ is being called with lazy broadcast expressions
+which may not work properly on GPU.
+=#
+
+using ClimaComms
+ClimaComms.@import_required_backends
+using ClimaCore
+using ClimaCore: Geometry, Spaces, Fields, Operators
+import ClimaAtmos: lazy
+import Thermodynamics as TD
+import ClimaParams as CP
+
+# Automatically choose device (GPU if available, otherwise CPU)
+const device = ClimaComms.device()
+const context = ClimaComms.context(device)
+
+# Float type
+const FT = Float64
+
+# Import necessary submodules
+import ClimaCore.Domains as Domains
+import ClimaCore.Meshes as Meshes
+import ClimaCore.Topologies as Topologies
+import ClimaCore.Quadratures as Quadratures
+
+#=
+Setup a simple spherical shell mesh with horizontal spectral element discretization
+and vertical finite difference discretization.
+=#
+function create_test_space(::Type{FT}; context) where {FT}
+    # Horizontal mesh (cubed sphere)
+    radius = FT(6.371e6)  # Earth radius in meters
+    h_elem = 4            # Number of horizontal elements per edge
+    npoly = 3             # Polynomial degree
+
+    # Create horizontal domain
+    h_domain = Domains.SphereDomain(radius)
+    h_mesh = Meshes.EquiangularCubedSphere(h_domain, h_elem)
+    h_topology = Topologies.Topology2D(context, h_mesh)
+    quad = Quadratures.GLL{npoly + 1}()
+    h_space = Spaces.SpectralElementSpace2D(h_topology, quad)
+
+    # Vertical mesh (finite difference)
+    z_elem = 10           # Number of vertical levels
+    z_min = FT(0)
+    z_max = FT(30000)     # 30 km domain height
+
+    z_domain = Domains.IntervalDomain(
+        Geometry.ZPoint(z_min),
+        Geometry.ZPoint(z_max);
+        boundary_names = (:bottom, :top),
+    )
+    z_mesh = Meshes.IntervalMesh(z_domain; nelems = z_elem)
+    z_topology = Topologies.IntervalTopology(context, z_mesh)
+
+    z_space_center = Spaces.CenterFiniteDifferenceSpace(z_topology)
+    z_space_face = Spaces.FaceFiniteDifferenceSpace(z_topology)
+
+    # Create hybrid 3D spaces
+    center_space = Spaces.ExtrudedFiniteDifferenceSpace(h_space, z_space_center)
+    face_space = Spaces.FaceExtrudedFiniteDifferenceSpace(center_space)
+
+    return center_space, face_space
+end
+
+#=
+Create the operator instances we need
+=#
+const gradₕ = Operators.Gradient()
+const C12 = Geometry.Covariant12Vector
+
+#=
+Get thermodynamic parameters
+=#
+function get_thermo_params()
+    toml_dict = CP.create_toml_dict(FT)
+    return TD.Parameters.ThermodynamicsParameters(toml_dict)
+end
+
+#=
+Simplified versions of the reference state functions from refstate_thermodynamics.jl
+=#
+function phi_r(thermo_params, p)
+    cp_d = TD.Parameters.cp_d(thermo_params)
+    T_min = TD.Parameters.T_min_ref(thermo_params)
+    T_sfc = TD.Parameters.T_surf_ref(thermo_params)
+    s_ref = 7
+
+    Π = TD.exner_given_pressure(thermo_params, p)
+    return -cp_d * (T_min * log(Π) + (T_sfc - T_min) / s_ref * (Π^s_ref - 1))
+end
+
+function theta_v(thermo_params, T, p, q_tot, q_liq, q_ice)
+    R_d = TD.Parameters.R_d(thermo_params)
+    R_m = TD.gas_constant_air(thermo_params, TD.PhasePartition(q_tot, q_liq, q_ice))
+    Π = TD.exner_given_pressure(thermo_params, p)
+    return T * R_m / (Π * R_d)
+end
+
+function air_temperature_reference(thermo_params, p)
+    T_min = TD.Parameters.T_min_ref(thermo_params)
+    T_sfc = TD.Parameters.T_surf_ref(thermo_params)
+    s_ref = 7
+    Π = TD.exner_given_pressure(thermo_params, p)
+    return T_min + (T_sfc - T_min) * Π^s_ref
+end
+
+function theta_vr(thermo_params, p)
+    T_r = air_temperature_reference(thermo_params, p)
+    Π = TD.exner_given_pressure(thermo_params, p)
+    return T_r / Π
+end
+
+#=
+Main test function: lazy expressions with gradₕ (this is what advection.jl does)
+=#
+function test_horizontal_dynamics_with_lazy()
+    # Create test spaces
+    center_space, face_space = create_test_space(FT; context)
+    
+    # Get thermodynamic parameters
+    thermo_params = get_thermo_params()
+    cp_d = TD.Parameters.cp_d(thermo_params)
+    
+    # Create test fields on center space
+    ᶜK = Fields.ones(FT, center_space) .* FT(100)  # kinetic energy
+    ᶜΦ = Fields.ones(FT, center_space) .* FT(1000)  # geopotential
+    ᶜp = Fields.ones(FT, center_space) .* FT(1e5)  # pressure
+    ᶜT = Fields.ones(FT, center_space) .* FT(280)  # temperature
+    ᶜq_tot_safe = Fields.ones(FT, center_space) .* FT(0.01)
+    ᶜq_liq_rai = Fields.zeros(FT, center_space)
+    ᶜq_ice_sno = Fields.zeros(FT, center_space)
+    
+    # Create velocity tendency field (Covariant12Vector)
+    Yₜ_uₕ = fill(Geometry.Covariant12Vector(FT(0), FT(0)), center_space)
+    
+    # Compute the lazy expressions (as done in advection.jl lines 75-80)
+    ᶜΦ_r = @. lazy(phi_r(thermo_params, ᶜp))
+    ᶜθ_v = @. lazy(theta_v(thermo_params, ᶜT, ᶜp, ᶜq_tot_safe, ᶜq_liq_rai, ᶜq_ice_sno))
+    ᶜθ_vr = @. lazy(theta_vr(thermo_params, ᶜp))
+    ᶜΠ = @. lazy(TD.exner_given_pressure(thermo_params, ᶜp))
+    ᶜθ_v_diff = @. lazy(ᶜθ_v - ᶜθ_vr)
+    
+    # This is the problematic line (lines 82-88 in advection.jl)
+    # split form pressure gradient: 0.5 * cp_d * [θv ∇Π + ∇(θv Π) - Π∇θv]
+    @. Yₜ_uₕ -= C12(
+        gradₕ(ᶜK + ᶜΦ - ᶜΦ_r) +
+        cp_d *
+        (
+            ᶜθ_v_diff * gradₕ(ᶜΠ) + gradₕ(ᶜθ_v_diff * ᶜΠ) - ᶜΠ * gradₕ(ᶜθ_v_diff)
+        ) / 2,
+    )
+    
+    return nothing
+end
+
+#=
+Alternative test: materialize intermediate fields first (potential workaround)
+=#
+function test_horizontal_dynamics_with_materialized()
+    # Create test spaces
+    center_space, face_space = create_test_space(FT; context)
+    
+    # Get thermodynamic parameters
+    thermo_params = get_thermo_params()
+    cp_d = TD.Parameters.cp_d(thermo_params)
+    
+    # Create test fields on center space
+    ᶜK = Fields.ones(FT, center_space) .* FT(100)
+    ᶜΦ = Fields.ones(FT, center_space) .* FT(1000)
+    ᶜp = Fields.ones(FT, center_space) .* FT(1e5)
+    ᶜT = Fields.ones(FT, center_space) .* FT(280)
+    ᶜq_tot_safe = Fields.ones(FT, center_space) .* FT(0.01)
+    ᶜq_liq_rai = Fields.zeros(FT, center_space)
+    ᶜq_ice_sno = Fields.zeros(FT, center_space)
+    
+    # Create velocity tendency field (Covariant12Vector)
+    Yₜ_uₕ = fill(Geometry.Covariant12Vector(FT(0), FT(0)), center_space)
+    
+    # Materialize intermediate fields instead of using lazy
+    ᶜΦ_r = @. phi_r(thermo_params, ᶜp)
+    ᶜθ_v = @. theta_v(thermo_params, ᶜT, ᶜp, ᶜq_tot_safe, ᶜq_liq_rai, ᶜq_ice_sno)
+    ᶜθ_vr = @. theta_vr(thermo_params, ᶜp)
+    ᶜΠ = @. TD.exner_given_pressure(thermo_params, ᶜp)
+    ᶜθ_v_diff = @. ᶜθ_v - ᶜθ_vr
+    
+    # Now use the materialized fields
+    @. Yₜ_uₕ -= C12(
+        gradₕ(ᶜK + ᶜΦ - ᶜΦ_r) +
+        cp_d *
+        (
+            ᶜθ_v_diff * gradₕ(ᶜΠ) + gradₕ(ᶜθ_v_diff * ᶜΠ) - ᶜΠ * gradₕ(ᶜθ_v_diff)
+        ) / 2,
+    )
+    
+    return nothing
+end
+
+#=
+Alternative test: materialize everything including gradₕ inputs
+=#
+function test_horizontal_dynamics_fully_materialized()
+    # Create test spaces
+    center_space, face_space = create_test_space(FT; context)
+    
+    # Get thermodynamic parameters
+    thermo_params = get_thermo_params()
+    cp_d = TD.Parameters.cp_d(thermo_params)
+    
+    # Create test fields on center space
+    ᶜK = Fields.ones(FT, center_space) .* FT(100)
+    ᶜΦ = Fields.ones(FT, center_space) .* FT(1000)
+    ᶜp = Fields.ones(FT, center_space) .* FT(1e5)
+    ᶜT = Fields.ones(FT, center_space) .* FT(280)
+    ᶜq_tot_safe = Fields.ones(FT, center_space) .* FT(0.01)
+    ᶜq_liq_rai = Fields.zeros(FT, center_space)
+    ᶜq_ice_sno = Fields.zeros(FT, center_space)
+    
+    # Create velocity tendency field (Covariant12Vector)
+    Yₜ_uₕ = fill(Geometry.Covariant12Vector(FT(0), FT(0)), center_space)
+    
+    # Materialize all intermediate fields
+    ᶜΦ_r = @. phi_r(thermo_params, ᶜp)
+    ᶜθ_v = @. theta_v(thermo_params, ᶜT, ᶜp, ᶜq_tot_safe, ᶜq_liq_rai, ᶜq_ice_sno)
+    ᶜθ_vr = @. theta_vr(thermo_params, ᶜp)
+    ᶜΠ = @. TD.exner_given_pressure(thermo_params, ᶜp)
+    ᶜθ_v_diff = @. ᶜθ_v - ᶜθ_vr
+    
+    # Also materialize the gradₕ inputs
+    ᶜscalar_for_grad1 = @. ᶜK + ᶜΦ - ᶜΦ_r
+    ᶜscalar_for_grad2 = @. ᶜθ_v_diff * ᶜΠ
+    
+    # Use fully materialized fields
+    @. Yₜ_uₕ -= C12(
+        gradₕ(ᶜscalar_for_grad1) +
+        cp_d *
+        (
+            ᶜθ_v_diff * gradₕ(ᶜΠ) + gradₕ(ᶜscalar_for_grad2) - ᶜΠ * gradₕ(ᶜθ_v_diff)
+        ) / 2,
+    )
+    
+    return nothing
+end
+
+# Run the tests
+# Test 1: With lazy expressions (this is what advection.jl does)
+test_horizontal_dynamics_with_lazy()
+
+# Test 2: With materialized intermediate fields
+test_horizontal_dynamics_with_materialized()
+
+# Test 3: With fully materialized fields including gradₕ inputs
+test_horizontal_dynamics_fully_materialized()