Reformulate IMEX ARK to correctly account for Newton residuals and DSS

dennisYatunin · dennisYatunin · commit b890aaacbfce · 2024-03-22T18:36:25.000-07:00
diff --git a/src/ClimaTimeSteppers.jl b/src/ClimaTimeSteppers.jl
@@ -59,6 +59,7 @@ array_device(x) = CUDADevice() # assume CUDA
 
 import DiffEqBase, SciMLBase, LinearAlgebra, DiffEqCallbacks, Krylov
 
+include(joinpath("utilities", "sparse_tuple.jl"))
 include(joinpath("utilities", "sparse_coeffs.jl"))
 include(joinpath("utilities", "fused_increment.jl"))
 include("sparse_containers.jl")
@@ -118,7 +119,7 @@ const SPCO = SparseCoeffs
 
 include("solvers/imex_tableaus.jl")
 include("solvers/explicit_tableaus.jl")
-include("solvers/imex_ark.jl")
+include("solvers/imex_ark_new.jl")
 include("solvers/imex_ssprk.jl")
 include("solvers/multirate.jl")
 include("solvers/lsrk.jl")
diff --git a/src/solvers/imex_ark_new.jl b/src/solvers/imex_ark_new.jl
@@ -0,0 +1,264 @@
+has_jac(T_imp!) =
+    hasfield(typeof(T_imp!), :Wfact) &&
+    hasfield(typeof(T_imp!), :jac_prototype) &&
+    !isnothing(T_imp!.Wfact) &&
+    !isnothing(T_imp!.jac_prototype)
+
+imp_error(name) = error("$(isnothing(name) ? "The given IMEXTableau" : name) \
+                         has implicit stages that require a nonlinear solver, \
+                         so NewtonsMethod must be specified alongside T_imp!.")
+
+sdirk_error(name) = error("$(isnothing(name) ? "The given IMEXTableau" : name) \
+                           has implicit stages with distinct coefficients (it \
+                           is not SDIRK), and an update is required whenever a \
+                           stage has a different coefficient from the previous \
+                           stage. Do not update on the NewTimeStep signal when \
+                           using $(isnothing(name) ? "this tableau" : name).")
+
+struct IMEXARKCache{S, N}
+    stage_cache::S
+    newtons_method_cache::N
+end
+
+function init_cache(prob::DiffEqBase.AbstractODEProblem, alg::IMEXAlgorithm{Unconstrained}; kwargs...)
+    (; u0) = prob
+    (; T_lim!, T_exp!, T_exp_T_lim!, T_imp!) = prob.f
+    (; name, newtons_method) = alg
+    (; a_exp, b_exp, a_imp, b_imp) = alg.tableau
+
+    no_T_lim = isnothing(T_lim!) && isnothing(T_exp_T_lim!)
+    no_T_exp = isnothing(T_exp!) && isnothing(T_exp_T_lim!)
+    no_T_imp = isnothing(T_imp!)
+
+    s = length(a_imp, 1) # number of stages
+
+    A_exp = vcat(a_exp, b_exp') # exp coefs with final state seen as stage s + 1
+    A_imp = vcat(a_imp, b_imp') # imp coefs with final state seen as stage s + 1
+    Γ = a_imp[nz_stages, nz_stages] # "fully implicit" part of a_imp
+    sdirk_γ = length(unique(diag(Γ))) == 1 ? diag(Γ)[1] : nothing
+
+    z_stages = findall(iszero, diag(a_imp)) # stages without implicit solves
+    nz_stages = findall(!iszero, diag(a_imp)) # stages with implicit solves
+    exp_stages = findall(col -> any(!iszero, col), eachcol(A_exp))
+    imp_z_stages = findall(col -> any(!iszero, col), eachcol(A_imp[:, z_stages]))
+
+    temp_value1 = similar(u0)
+    temp_value2 = similar(u0)
+    T_lim_by_stage = no_T_lim ? SparseTuple() : SparseTuple(map(_ -> similar(u0), exp_stages), exp_stages)
+    T_exp_by_stage = no_T_exp ? SparseTuple() : SparseTuple(map(_ -> similar(u0), exp_stages), exp_stages)
+    T_imp_by_stage = no_T_imp ? SparseTuple() : SparseTuple(map(_ -> similar(u0), imp_z_stages), imp_z_stages)
+    ΔU_imp_by_stage = no_T_imp ? SparseTuple() : SparseTuple(map(_ -> similar(u0), nz_stages), nz_stages)
+
+    isnothing(newtons_method) && !isempty(ΔU_imp_by_stage) && imp_error(name)
+
+    prev_ΔtT_lim_to_Δu_coefs_by_stage = no_T_lim ? SparseTuple() : sparse_matrix_rows(A_exp, 1:(s + 1), 1:s)
+    prev_ΔtT_exp_to_Δu_coefs_by_stage = no_T_exp ? SparseTuple() : sparse_matrix_rows(A_exp, 1:(s + 1), 1:s)
+    prev_z_ΔtT_imp_to_Δu_coefs_by_stage =
+        no_T_imp ? SparseTuple() : sparse_matrix_rows(A_imp[:, z_stages], 1:(s + 1), z_stages)
+
+    prev_nz_ΔtT_imp_to_Δu_coef_matrix = A_imp[:, nz_stages]
+    prev_nz_ΔtT_imp_to_Δu_coef_matrix[nz_stages, nz_stages] .= Γ - Diagonal(Γ)
+    prev_nz_ΔU_imp_to_Δu_coefs_by_stage =
+        no_T_imp ? SparseTuple() : sparse_matrix_rows(prev_nz_ΔtT_imp_to_Δu_coef_matrix * inv(Γ), 1:(s + 1), nz_stages)
+
+    # Convert all inputs to unrolled_foreach in step_u! into regular tuples.
+    T_lim_by_stage_dense = dense_tuple(T_lim_by_stage, s, nothing)
+    T_exp_by_stage_dense = dense_tuple(T_exp_by_stage, s, nothing)
+    T_imp_by_stage_dense = dense_tuple(T_imp_by_stage, s, nothing)
+    ΔU_imp_by_stage_dense = dense_tuple(ΔU_imp_by_stage, s, nothing)
+    prev_ΔtT_lim_to_Δu_coefs_by_stage_dense = dense_tuple(prev_ΔtT_lim_to_Δu_coefs_by_stage, s, SparseTuple())
+    prev_ΔtT_exp_to_Δu_coefs_by_stage_dense = dense_tuple(prev_ΔtT_exp_to_Δu_coefs_by_stage, s, SparseTuple())
+    prev_z_ΔtT_imp_to_Δu_coefs_by_stage_dense = dense_tuple(prev_z_ΔtT_imp_to_Δu_coefs_by_stage, s, SparseTuple())
+    prev_nz_ΔU_imp_to_Δu_coefs_by_stage_dense = dense_tuple(prev_nz_ΔU_imp_to_Δu_coefs_by_stage, s, SparseTuple())
+
+    stage_cache = (;
+        s,
+        sdirk_γ,
+        temp_value1,
+        temp_value2,
+        T_lim_by_stage,
+        T_exp_by_stage,
+        T_imp_by_stage,
+        ΔU_imp_by_stage,
+        T_lim_by_stage_dense,
+        T_exp_by_stage_dense,
+        T_imp_by_stage_dense,
+        ΔU_imp_by_stage_dense,
+        prev_ΔtT_lim_to_Δu_coefs_by_stage_dense,
+        prev_ΔtT_exp_to_Δu_coefs_by_stage_dense,
+        prev_z_ΔtT_imp_to_Δu_coefs_by_stage_dense,
+        prev_nz_ΔU_imp_to_Δu_coefs_by_stage_dense,
+    )
+
+    newtons_method_cache =
+        isnothing(newtons_method) ? nothing :
+        allocate_cache(newtons_method, u0, has_jac(T_imp!) ? T_imp!.jac_prototype : nothing)
+
+    return IMEXARKCache(stage_cache, newtons_method_cache)
+end
+
+function step_u!(integrator, cache::IMEXARKCache)
+    (; u, p, t, alg) = integrator
+    (; T_lim!, T_exp!, T_exp_T_lim!, T_imp!) = integrator.sol.prob.f
+    (; lim!, dss!, post_explicit!, post_implicit!) = integrator.sol.prob.f
+    (; name, newtons_method) = alg
+    (; a_exp, b_exp, a_imp, b_imp, c_exp, c_imp) = alg.tableau
+    (; newtons_method_cache) = cache
+    (;
+        s,
+        sdirk_γ,
+        temp_value1,
+        temp_value2,
+        T_lim_by_stage,
+        T_exp_by_stage,
+        T_imp_by_stage,
+        ΔU_imp_by_stage,
+        T_lim_by_stage_dense,
+        T_exp_by_stage_dense,
+        T_imp_by_stage_dense,
+        ΔU_imp_by_stage_dense,
+        prev_ΔtT_lim_to_Δu_coefs_by_stage_dense,
+        prev_ΔtT_exp_to_Δu_coefs_by_stage_dense,
+        prev_z_ΔtT_imp_to_Δu_coefs_by_stage_dense,
+        prev_nz_ΔU_imp_to_Δu_coefs_by_stage_dense,
+    ) = cache.stage_cache
+
+    Δt = integrator.dt
+
+    if !isnothing(T_imp!) && !isnothing(newtons_method)
+        (; update_j) = newtons_method
+        (; j) = newtons_method_cache
+        if !isnothing(j) && needs_update!(update_j, NewTimeStep(t))
+            isnothing(sdirk_γ) && sdirk_error(name)
+            T_imp!.Wfact(j, u, p, Δt * sdirk_γ, t)
+        end
+    end
+
+    unrolled_foreach(
+        1:s,
+        T_lim_by_stage_dense[1:s],
+        T_exp_by_stage_dense[1:s],
+        T_imp_by_stage_dense[1:s],
+        ΔU_imp_by_stage_dense[1:s],
+        prev_ΔtT_lim_to_Δu_coefs_by_stage_dense[1:s],
+        prev_ΔtT_exp_to_Δu_coefs_by_stage_dense[1:s],
+        prev_z_ΔtT_imp_to_Δu_coefs_by_stage_dense[1:s],
+        prev_nz_ΔU_imp_to_Δu_coefs_by_stage_dense[1:s],
+    ) do (
+        stage,
+        T_lim,
+        T_exp,
+        T_imp,
+        ΔU_imp,
+        prev_ΔtT_lim_to_Δu_coefs,
+        prev_ΔtT_exp_to_Δu_coefs,
+        prev_z_ΔtT_imp_to_Δu_coefs,
+        prev_nz_ΔU_imp_to_Δu_coefs,
+    )
+        t_exp = t + Δt * c_exp[stage]
+        t_imp = t + Δt * c_imp[stage]
+        Δtγ = Δt * a_imp[stage, stage]
+
+        Δu_over_Δt_from_prev_T_lim = broadcasted_dot(prev_ΔtT_lim_to_Δu_coefs, T_lim_by_stage)
+        Δu_over_Δt_from_prev_T_exp = broadcasted_dot(prev_ΔtT_exp_to_Δu_coefs, T_exp_by_stage)
+        Δu_over_Δt_from_prev_T_imp = broadcasted_dot(prev_z_ΔtT_imp_to_Δu_coefs, T_imp_by_stage)
+        Δu_from_prev_ΔU_imp = broadcasted_dot(prev_nz_ΔU_imp_to_Δu_coefs, ΔU_imp_by_stage)
+
+        if isempty(prev_ΔtT_lim_to_Δu_coefs)
+            u_plus_Δu_lim = u
+        else
+            u_plus_Δu_lim = temp_value1
+            @. u_plus_Δu_lim = u + Δt * Δu_over_Δt_from_prev_T_lim
+            lim!(u_plus_Δu_lim, p, t_exp, u)
+        end
+
+        if (
+            isempty(prev_ΔtT_exp_to_Δu_coefs) &&
+            isempty(prev_z_ΔtT_imp_to_Δu_coefs) &&
+            isempty(prev_nz_ΔU_imp_to_Δu_coefs)
+        )
+            U_before_solve = u_plus_Δu_lim
+        else
+            U_before_solve = temp_value2
+            @. U_before_solve =
+                u_plus_Δu_lim + Δt * (Δu_over_Δt_from_prev_T_exp + Δu_over_Δt_from_prev_T_imp) + Δu_from_prev_ΔU_imp
+        end
+
+        post_explicit!(U_before_solve, p, t_imp)
+
+        if !isnothing(ΔU_imp)
+            U = ΔU_imp # Use ΔU_imp as additional temporary storage.
+            @. U = U_before_solve
+
+            # Solve U ≈ U_before_solve + Δtγ * T_imp(U, p, t_imp) for U.
+            set_residual! = (residual, U) -> begin
+                T_imp!(residual, U, p, t_imp)
+                @. residual = U_before_solve - U + Δtγ * residual
+            end
+            set_jacobian! = (jacobian, U) -> T_imp!.Wfact(jacobian, U, p, Δtγ, t_imp)
+            post_implicit! = U -> post_implicit!(U, p, t_imp)
+            solve_newton!(
+                newtons_method,
+                newtons_method_cache,
+                U,
+                set_residual!,
+                set_jacobian!,
+                post_implicit!,
+                post_implicit!,
+            )
+        else
+            U = U_before_solve # There is no solve on this stage.
+        end
+
+        if !isnothing(T_lim) || !isnothing(T_exp)
+            if !isnothing(T_exp_T_lim!)
+                T_exp_T_lim!(T_exp, T_lim, U, p, t_exp)
+                dss!(T_lim, p, t_exp)
+                dss!(T_exp, p, t_exp)
+            end
+            if !isnothing(T_lim!)
+                T_lim!(T_lim, U, p, t_exp)
+                dss!(T_lim, p, t_exp)
+            end
+            if !isnothing(T_exp!)
+                T_exp!(T_exp, U, p, t_exp)
+                dss!(T_exp, p, t_exp)
+            end
+            # TODO: Can we just use T_exp_T_lim!, and not 3 different functions?
+            # TODO: Fuse the DSS calls above into a single operation.
+        end
+
+        if !isnothing(ΔU_imp)
+            @. ΔU_imp = U - u_plus_Δu_lim - Δt * Δu_over_Δt_from_prev_T_exp
+        elseif !isnothing(T_imp)
+            T_imp!(T_imp, U, p, t_imp)
+        end
+    end
+
+    t_final = t + Δt
+    prev_ΔtT_lim_to_Δu_coefs_final = prev_ΔtT_lim_to_Δu_coefs_by_stage_dense[s + 1]
+    prev_ΔtT_exp_to_Δu_coefs_final = prev_ΔtT_exp_to_Δu_coefs_by_stage_dense[s + 1]
+    prev_z_ΔtT_imp_to_Δu_coefs_final = prev_z_ΔtT_imp_to_Δu_coefs_by_stage_dense[s + 1]
+    prev_nz_ΔU_imp_to_Δu_coefs_final = prev_nz_ΔU_imp_to_Δu_coefs_by_stage_dense[s + 1]
+    Δu_over_Δt_from_prev_T_lim_final = broadcasted_dot(prev_ΔtT_lim_to_Δu_coefs_final, T_lim_by_stage)
+    Δu_over_Δt_from_prev_T_exp_final = broadcasted_dot(prev_ΔtT_exp_to_Δu_coefs_final, T_exp_by_stage)
+    Δu_over_Δt_from_prev_T_imp_final = broadcasted_dot(prev_z_ΔtT_imp_to_Δu_coefs_final, T_imp_by_stage)
+    Δu_from_prev_ΔU_imp_final = broadcasted_dot(prev_nz_ΔU_imp_to_Δu_coefs_final, ΔU_imp_by_stage)
+
+    if isempty(prev_ΔtT_lim_to_Δu_coefs_final)
+        u_plus_Δu_lim = u
+    else
+        u_plus_Δu_lim = temp_value
+        @. u_plus_Δu_lim = u + Δt * Δu_over_Δt_from_prev_T_lim_final
+        lim!(u_plus_Δu_lim, p, t_final, u)
+    end
+
+    @. u =
+        u_plus_Δu_lim +
+        Δt * (Δu_over_Δt_from_prev_T_exp_final + Δu_over_Δt_from_prev_T_imp_final) +
+        Δu_from_prev_ΔU_imp_final
+    dss!(u, p, t_final)
+    post_explicit!(u, p, t_final)
+
+    return u
+end
diff --git a/src/solvers/imex_tableaus.jl b/src/solvers/imex_tableaus.jl
@@ -19,15 +19,14 @@ default values for `c_exp` and `c_imp` assume that it is internally consistent.
 The explicit tableau must be strictly lower triangular, and the implicit tableau
 must be lower triangular (only DIRK algorithms are currently supported).
 """
-struct IMEXTableau{AE <: SPCO, BE <: SPCO, CE <: SPCO, AI <: SPCO, BI <: SPCO, CI <: SPCO}
-    a_exp::AE # matrix of size s×s
-    b_exp::BE # vector of length s
-    c_exp::CE # vector of length s
-    a_imp::AI # matrix of size s×s
-    b_imp::BI # vector of length s
-    c_imp::CI # vector of length s
-end
-IMEXTableau(args...) = IMEXTableau(map(x -> SparseCoeffs(x), args)...)
+struct IMEXTableau{M, V}
+    a_exp::M # matrix of size s×s
+    b_exp::V # vector of length s
+    c_exp::V # vector of length s
+    a_imp::M # matrix of size s×s
+    b_imp::V # vector of length s
+    c_imp::V # vector of length s
+end
 
 function IMEXTableau(;
     a_exp,
diff --git a/src/utilities/sparse_tuple.jl b/src/utilities/sparse_tuple.jl