diff --git a/Project.toml b/Project.toml index 4a369fbba..9447b49a6 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "ModelPredictiveControl" uuid = "61f9bdb8-6ae4-484a-811f-bbf86720c31c" -version = "1.12.0" +version = "1.13.0" authors = ["Francis Gagnon"] [deps] diff --git a/benchmark/2_bench_state_estim.jl b/benchmark/2_bench_state_estim.jl index 25dc87042..d49005861 100644 --- a/benchmark/2_bench_state_estim.jl +++ b/benchmark/2_bench_state_estim.jl @@ -305,6 +305,13 @@ mhe_pendulum_ipopt_curr = setconstraint!(mhe_pendulum_ipopt_curr; v̂min, v̂max JuMP.unset_time_limit_sec(mhe_pendulum_ipopt_curr.optim) JuMP.set_attribute(mhe_pendulum_ipopt_curr.optim, "tol", 1e-7) +hessian = true +mhe_pendulum_ipopt_currh = MovingHorizonEstimator( + model; He, σQ, σR, nint_u, σQint_u, optim, direct, hessian +) +mhe_pendulum_ipopt_currh = setconstraint!(mhe_pendulum_ipopt_currh; v̂min, v̂max) +JuMP.unset_time_limit_sec(mhe_pendulum_ipopt_currh.optim) + optim = JuMP.Model(optimizer_with_attributes(Ipopt.Optimizer,"sb"=>"yes"), add_bridges=false) direct = false mhe_pendulum_ipopt_pred = MovingHorizonEstimator( @@ -314,6 +321,14 @@ mhe_pendulum_ipopt_pred = setconstraint!(mhe_pendulum_ipopt_pred; v̂min, v̂max JuMP.unset_time_limit_sec(mhe_pendulum_ipopt_pred.optim) JuMP.set_attribute(mhe_pendulum_ipopt_pred.optim, "tol", 1e-7) +hessian = true +mhe_pendulum_ipopt_predh = MovingHorizonEstimator( + model; He, σQ, σR, nint_u, σQint_u, optim, direct, hessian +) +mhe_pendulum_ipopt_predh = setconstraint!(mhe_pendulum_ipopt_predh; v̂min, v̂max) +JuMP.unset_time_limit_sec(mhe_pendulum_ipopt_predh.optim) +JuMP.set_attribute(mhe_pendulum_ipopt_predh.optim, "tol", 1e-7) + optim = JuMP.Model(MadNLP.Optimizer, add_bridges=false) direct = true mhe_pendulum_madnlp_curr = MovingHorizonEstimator( @@ -338,11 +353,21 @@ CASE_ESTIM["Pendulum"]["MovingHorizonEstimator"]["Ipopt"]["Current form"] = sim!($mhe_pendulum_ipopt_curr, $N, $u; plant=$plant, x_0=$x_0, x̂_0=$x̂_0, progress=false), samples=samples, evals=evals, seconds=seconds ) +CASE_ESTIM["Pendulum"]["MovingHorizonEstimator"]["Ipopt"]["Current form (Hessian)"] = + @benchmarkable( + sim!($mhe_pendulum_ipopt_currh, $N, $u; plant=$plant, x_0=$x_0, x̂_0=$x̂_0, progress=false), + samples=samples, evals=evals, seconds=seconds + ) CASE_ESTIM["Pendulum"]["MovingHorizonEstimator"]["Ipopt"]["Prediction form"] = @benchmarkable( sim!($mhe_pendulum_ipopt_pred, $N, $u; plant=$plant, x_0=$x_0, x̂_0=$x̂_0, progress=false), samples=samples, evals=evals, seconds=seconds ) +CASE_ESTIM["Pendulum"]["MovingHorizonEstimator"]["Ipopt"]["Prediction form (Hessian)"] = + @benchmarkable( + sim!($mhe_pendulum_ipopt_predh, $N, $u; plant=$plant, x_0=$x_0, x̂_0=$x̂_0, progress=false), + samples=samples, evals=evals, seconds=seconds + ) CASE_ESTIM["Pendulum"]["MovingHorizonEstimator"]["MadNLP"]["Current form"] = @benchmarkable( sim!($mhe_pendulum_madnlp_curr, $N, $u; plant=$plant, x_0=$x_0, x̂_0=$x̂_0, progress=false), diff --git a/benchmark/3_bench_predictive_control.jl b/benchmark/3_bench_predictive_control.jl index 427f2e7e6..59b99ffdf 100644 --- a/benchmark/3_bench_predictive_control.jl +++ b/benchmark/3_bench_predictive_control.jl @@ -318,7 +318,13 @@ optim = JuMP.Model(MadNLP.Optimizer, add_bridges=false) transcription = SingleShooting() nmpc_madnlp_ss = NonLinMPC(estim; Hp, Hc, Mwt, Nwt, Cwt, optim, transcription) nmpc_madnlp_ss = setconstraint!(nmpc_madnlp_ss; umin, umax) -JuMP.unset_time_limit_sec(nmpc_madnlp_ss.optim) +JuMP.unset_time_limit_sec(nmpc_madnlp_ss.optim) + +optim = JuMP.Model(()->UnoSolver.Optimizer(preset="filtersqp"), add_bridges=false) +transcription, hessian, oracle = MultipleShooting(), true, true +nmpc_uno_ms_hess = NonLinMPC(estim; Hp, Hc, Mwt, Nwt, Cwt, optim, transcription, hessian, oracle) +nmpc_uno_ms_hess = setconstraint!(nmpc_uno_ms_hess; umin, umax) +JuMP.unset_time_limit_sec(nmpc_uno_ms_hess.optim) # TODO: does not work well with MadNLP and MultipleShooting or TrapezoidalCollocation, # figure out why. Current theory: @@ -375,6 +381,11 @@ CASE_MPC["Pendulum"]["NonLinMPC"]["Noneconomic"]["MadNLP"]["SingleShooting"] = sim!($nmpc_madnlp_ss, $N, $ry; plant=$plant, x_0=$x_0, x̂_0=$x̂_0, progress=false), samples=samples, evals=evals, seconds=seconds ) +CASE_MPC["Pendulum"]["NonLinMPC"]["Noneconomic"]["Uno"]["MultipleShooting (Hessian)"] = + @benchmarkable( + sim!($nmpc_uno_ms_hess, $N, $ry; plant=$plant, x_0=$x_0, x̂_0=$x̂_0, progress=false), + samples=samples, evals=evals, seconds=seconds + ) # ----------------- Case study: Pendulum economic -------------------------------- model2, p = pendulum_model2, pendulum_p2 diff --git a/benchmark/Project.toml b/benchmark/Project.toml index 282e1fcac..0aa81c0ee 100644 --- a/benchmark/Project.toml +++ b/benchmark/Project.toml @@ -13,3 +13,4 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" SparseConnectivityTracer = "9f842d2f-2579-4b1d-911e-f412cf18a3f5" SparseMatrixColorings = "0a514795-09f3-496d-8182-132a7b665d35" +UnoSolver = "1baa60ac-02f7-4b39-a7a8-2f4f58486b05" diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index 4a83ede08..914ff804a 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -1,6 +1,6 @@ using BenchmarkTools using ModelPredictiveControl, ControlSystemsBase, LinearAlgebra -using JuMP, OSQP, DAQP, Ipopt, MadNLP +using JuMP, OSQP, DAQP, Ipopt, MadNLP, UnoSolver const SUITE = BenchmarkGroup(["ModelPredictiveControl"]) diff --git a/docs/src/internals/state_estim.md b/docs/src/internals/state_estim.md index caba31010..6b92cc2d7 100644 --- a/docs/src/internals/state_estim.md +++ b/docs/src/internals/state_estim.md @@ -18,7 +18,8 @@ ModelPredictiveControl.relaxX̂ ModelPredictiveControl.relaxŴ ModelPredictiveControl.relaxV̂ ModelPredictiveControl.init_matconstraint_mhe -ModelPredictiveControl.get_nonlinops(::MovingHorizonEstimator, ::ModelPredictiveControl.GenericModel) +ModelPredictiveControl.get_nonlinobj_op(::MovingHorizonEstimator, ::ModelPredictiveControl.GenericModel) +ModelPredictiveControl.get_nonlincon_oracle(::MovingHorizonEstimator, ::ModelPredictiveControl.GenericModel) ``` ## Augmented Model diff --git a/src/controller/nonlinmpc.jl b/src/controller/nonlinmpc.jl index cabafde4c..987be9ecc 100644 --- a/src/controller/nonlinmpc.jl +++ b/src/controller/nonlinmpc.jl @@ -222,11 +222,11 @@ This controller allocates memory at each time step for the optimization. function, see [`DifferentiationInterface` doc](@extref DifferentiationInterface List). - `jacobian=default_jacobian(transcription)` : an `AbstractADType` backend for the Jacobian of the nonlinear constraints, see `gradient` above for the options (default in Extended Help). -- `hessian=false` : an `AbstractADType` backend for the Hessian of the Lagrangian, see - `gradient` above for the options. The default `false` skip it and use the quasi-Newton - method of `optim`, which is always the case if `oracle=false` (see Extended Help). -- `oracle=JuMP.solver_name(optim)=="Ipopt"`: use the efficient [`VectorNonlinearOracle`](@extref MathOptInterface MathOptInterface.VectorNonlinearOracle) - for the nonlinear constraints (not supported by most optimizers for now). +- `hessian=false` : an `AbstractADType` backend or `Bool` for the Hessian of the Lagrangian, + see `gradient` above for the options. The default `false` skip it and use the quasi-Newton + method of `optim`, which is always the case if `oracle=false` (see Extended Help). +- `oracle=JuMP.solver_name(optim)=="Ipopt"` : a `Bool` to use the [`VectorNonlinearOracle`](@extref MathOptInterface MathOptInterface.VectorNonlinearOracle) + for efficient nonlinear constraints (not supported by most optimizers for now). - additional keyword arguments are passed to [`UnscentedKalmanFilter`](@ref) constructor (or [`SteadyKalmanFilter`](@ref), for [`LinModel`](@ref)). @@ -409,7 +409,7 @@ function NonLinMPC( validate_JE(NT, JE) gc! = get_mutating_gc(NT, gc) weights = ControllerWeights(estim.model, Hp, Hc, M_Hp, N_Hc, L_Hp, Cwt, Ewt) - hessian = validate_hessian(hessian, gradient, oracle) + hessian = validate_hessian(hessian, gradient, oracle, DEFAULT_NONLINMPC_HESSIAN) return NonLinMPC{NT}( estim, Hp, Hc, nb, weights, JE, gc!, nc, p, transcription, optim, gradient, jacobian, hessian, oracle @@ -512,34 +512,6 @@ function test_custom_functions(NT, model::SimModel, JE, gc!, nc, Uop, Yop, Dop, return nothing end -""" - validate_hessian(hessian, gradient, oracle) -> backend - -Validate `hessian` argument and return the differentiation backend. -""" -function validate_hessian(hessian, gradient, oracle) - if hessian == true - backend = DEFAULT_NONLINMPC_HESSIAN - elseif hessian == false || isnothing(hessian) - backend = nothing - else - backend = hessian - end - if oracle == false && !isnothing(backend) - error("Second order derivatives are only supported with oracle=true.") - end - if oracle == true && !isnothing(backend) - hess = dense_backend(backend) - grad = dense_backend(gradient) - if hess != grad - @info "The objective function gradient will be computed with the hessian "* - "backend ($(backend_str(hess)))\n instead of the one in gradient "* - "argument ($(backend_str(grad))) for efficiency." - end - end - return backend -end - """ addinfo!(info, mpc::NonLinMPC) -> info @@ -626,6 +598,134 @@ function reset_nonlincon!(mpc::NonLinMPC) end end +""" + get_nonlinobj_op(mpc::NonLinMPC, optim::JuMP.GenericModel{JNT}) -> J_op + +Return the nonlinear operator for the objective of `mpc` [`NonLinMPC`](@ref). + +It is based on the splatting syntax. This method is really intricate and that's because of: + +- These functions are used inside the nonlinear optimization, so they must be type-stable + and as efficient as possible. All the function outputs and derivatives are cached and + updated in-place if required to use the efficient [`value_and_gradient!`](@extref DifferentiationInterface DifferentiationInterface.value_and_jacobian!). +- The splatting syntax for objective functions implies the use of `Vararg{T,N}` (see the [performance tip](@extref Julia Be-aware-of-when-Julia-avoids-specializing)) + and memoization to avoid redundant computations. This is already complex, but it's even + worse knowing that the automatic differentiation tools do not support splatting. +- The signature of gradient and hessian functions is not the same for univariate (`nZ̃ == 1`) + and multivariate (`nZ̃ > 1`) operators in `JuMP`. Both must be defined. +""" +function get_nonlinobj_op(mpc::NonLinMPC, optim::JuMP.GenericModel{JNT}) where JNT<:Real + model = mpc.estim.model + transcription = mpc.transcription + grad, hess = mpc.gradient, mpc.hessian + nu, ny, nx̂, nϵ = model.nu, model.ny, mpc.estim.nx̂, mpc.nϵ + nk = get_nk(model, transcription) + Hp, Hc = mpc.Hp, mpc.Hc + ng = length(mpc.con.i_g) + nc, neq = mpc.con.nc, mpc.con.neq + nZ̃, nU, nŶ, nX̂, nK = length(mpc.Z̃), Hp*nu, Hp*ny, Hp*nx̂, Hp*nk + nΔŨ, nUe, nŶe = nu*Hc + nϵ, nU + nu, nŶ + ny + strict = Val(true) + myNaN = convert(JNT, NaN) + J::Vector{JNT} = zeros(JNT, 1) + ΔŨ::Vector{JNT} = zeros(JNT, nΔŨ) + x̂0end::Vector{JNT} = zeros(JNT, nx̂) + K0::Vector{JNT} = zeros(JNT, nK) + Ue::Vector{JNT}, Ŷe::Vector{JNT} = zeros(JNT, nUe), zeros(JNT, nŶe) + U0::Vector{JNT}, Ŷ0::Vector{JNT} = zeros(JNT, nU), zeros(JNT, nŶ) + Û0::Vector{JNT}, X̂0::Vector{JNT} = zeros(JNT, nU), zeros(JNT, nX̂) + gc::Vector{JNT}, g::Vector{JNT} = zeros(JNT, nc), zeros(JNT, ng) + geq::Vector{JNT} = zeros(JNT, neq) + function J!(Z̃, ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g, geq) + update_predictions!(ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g, geq, mpc, Z̃) + return obj_nonlinprog!(Ŷ0, U0, mpc, model, Ue, Ŷe, ΔŨ) + end + Z̃_J = fill(myNaN, nZ̃) # NaN to force update at first call + J_cache = ( + Cache(ΔŨ), Cache(x̂0end), Cache(Ue), Cache(Ŷe), Cache(U0), Cache(Ŷ0), + Cache(Û0), Cache(K0), Cache(X̂0), + Cache(gc), Cache(g), Cache(geq), + ) + ∇J_prep = prepare_gradient(J!, grad, Z̃_J, J_cache...; strict) + ∇J = Vector{JNT}(undef, nZ̃) + if !isnothing(hess) + ∇²J_prep = prepare_hessian(J!, hess, Z̃_J, J_cache...; strict) + ∇²J = init_diffmat(JNT, hess, ∇²J_prep, nZ̃, nZ̃) + end + update_objective! = if !isnothing(hess) + function (J, ∇J, ∇²J, Z̃_J, Z̃_arg) + if isdifferent(Z̃_arg, Z̃_J) + Z̃_J .= Z̃_arg + J[], _ = value_gradient_and_hessian!( + J!, ∇J, ∇²J, ∇²J_prep, hess, Z̃_J, J_cache... + ) + end + end + else + update_objective! = function (J, ∇J, Z̃_∇J, Z̃_arg) + if isdifferent(Z̃_arg, Z̃_∇J) + Z̃_∇J .= Z̃_arg + J[], _ = value_and_gradient!( + J!, ∇J, ∇J_prep, grad, Z̃_∇J, J_cache... + ) + end + end + end + J_func = if !isnothing(hess) + function (Z̃_arg::Vararg{T, N}) where {N, T<:Real} + update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg) + return J[]::T + end + else + function (Z̃_arg::Vararg{T, N}) where {N, T<:Real} + update_objective!(J, ∇J, Z̃_J, Z̃_arg) + return J[]::T + end + end + ∇J_func! = if nZ̃ == 1 # univariate syntax (see JuMP.@operator doc): + if !isnothing(hess) + function (Z̃_arg) + update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg) + return ∇J[] + end + else + function (Z̃_arg) + update_objective!(J, ∇J, Z̃_J, Z̃_arg) + return ∇J[] + end + end + else # multivariate syntax (see JuMP.@operator doc): + if !isnothing(hess) + function (∇J_arg::AbstractVector{T}, Z̃_arg::Vararg{T, N}) where {N, T<:Real} + update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg) + return ∇J_arg .= ∇J + end + else + function (∇J_arg::AbstractVector{T}, Z̃_arg::Vararg{T, N}) where {N, T<:Real} + update_objective!(J, ∇J, Z̃_J, Z̃_arg) + return ∇J_arg .= ∇J + end + end + end + ∇²J_func! = if nZ̃ == 1 # univariate syntax (see JuMP.@operator doc): + function (Z̃_arg) + update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg) + return ∇²J[] + end + else # multivariate syntax (see JuMP.@operator doc): + function (∇²J_arg::AbstractMatrix{T}, Z̃_arg::Vararg{T, N}) where {N, T<:Real} + update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg) + return fill_lowertriangle!(∇²J_arg, ∇²J) + end + end + if !isnothing(hess) + @operator(optim, J_op, nZ̃, J_func, ∇J_func!, ∇²J_func!) + else + @operator(optim, J_op, nZ̃, J_func, ∇J_func!) + end + return J_op +end + """ get_nonlincon_oracle(mpc::NonLinMPC, optim) -> g_oracle, geq_oracle @@ -661,7 +761,7 @@ function get_nonlincon_oracle(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JN Û0::Vector{JNT}, X̂0::Vector{JNT} = zeros(JNT, nU), zeros(JNT, nX̂) gc::Vector{JNT}, g::Vector{JNT} = zeros(JNT, nc), zeros(JNT, ng) gi::Vector{JNT}, geq::Vector{JNT} = zeros(JNT, ngi), zeros(JNT, neq) - λi::Vector{JNT}, λeq::Vector{JNT} = ones(JNT, ngi), ones(JNT, neq) + λi::Vector{JNT}, λeq::Vector{JNT} = ones(JNT, ngi), ones(JNT, neq) # -------------- inequality constraint: nonlinear oracle ----------------------------- function gi!(gi, Z̃, ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, geq, g) update_predictions!(ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g, geq, mpc, Z̃) @@ -674,22 +774,22 @@ function get_nonlincon_oracle(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JN return dot(λi, gi) end Z̃_∇gi = fill(myNaN, nZ̃) # NaN to force update at first call - ∇gi_context = ( + ∇gi_cache = ( Cache(ΔŨ), Cache(x̂0end), Cache(Ue), Cache(Ŷe), Cache(U0), Cache(Ŷ0), Cache(Û0), Cache(K0), Cache(X̂0), Cache(gc), Cache(geq), Cache(g) ) - ∇gi_prep = prepare_jacobian(gi!, gi, jac, Z̃_∇gi, ∇gi_context...; strict) + ∇gi_prep = prepare_jacobian(gi!, gi, jac, Z̃_∇gi, ∇gi_cache...; strict) ∇gi = init_diffmat(JNT, jac, ∇gi_prep, nZ̃, ngi) ∇gi_structure = init_diffstructure(∇gi) if !isnothing(hess) - ∇²gi_context = ( + ∇²gi_cache = ( Cache(ΔŨ), Cache(x̂0end), Cache(Ue), Cache(Ŷe), Cache(U0), Cache(Ŷ0), Cache(Û0), Cache(K0), Cache(X̂0), Cache(gc), Cache(geq), Cache(g), Cache(gi) ) ∇²gi_prep = prepare_hessian( - ℓ_gi, hess, Z̃_∇gi, Constant(λi), ∇²gi_context...; strict + ℓ_gi, hess, Z̃_∇gi, Constant(λi), ∇²gi_cache...; strict ) ∇²ℓ_gi = init_diffmat(JNT, hess, ∇²gi_prep, nZ̃, nZ̃) ∇²gi_structure = lowertriangle_indices(init_diffstructure(∇²ℓ_gi)) @@ -697,7 +797,7 @@ function get_nonlincon_oracle(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JN function update_con!(gi, ∇gi, Z̃_∇gi, Z̃_arg) if isdifferent(Z̃_arg, Z̃_∇gi) Z̃_∇gi .= Z̃_arg - value_and_jacobian!(gi!, gi, ∇gi, ∇gi_prep, jac, Z̃_∇gi, ∇gi_context...) + value_and_jacobian!(gi!, gi, ∇gi, ∇gi_prep, jac, Z̃_∇gi, ∇gi_cache...) end return nothing end @@ -712,7 +812,7 @@ function get_nonlincon_oracle(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JN function ∇²gi_func!(∇²ℓ_arg, Z̃_arg, λ_arg) Z̃_∇gi .= Z̃_arg λi .= λ_arg - hessian!(ℓ_gi, ∇²ℓ_gi, ∇²gi_prep, hess, Z̃_∇gi, Constant(λi), ∇²gi_context...) + hessian!(ℓ_gi, ∇²ℓ_gi, ∇²gi_prep, hess, Z̃_∇gi, Constant(λi), ∇²gi_cache...) return diffmat2vec!(∇²ℓ_arg, ∇²ℓ_gi, ∇²gi_structure) end gi_min = fill(-myInf, ngi) @@ -725,7 +825,7 @@ function get_nonlincon_oracle(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JN jacobian_structure = ∇gi_structure, eval_jacobian = ∇gi_func!, hessian_lagrangian_structure = isnothing(hess) ? Tuple{Int,Int}[] : ∇²gi_structure, - eval_hessian_lagrangian = isnothing(hess) ? nothing : ∇²gi_func! + eval_hessian_lagrangian = isnothing(hess) ? nothing : ∇²gi_func! ) # ------------- equality constraints : nonlinear oracle ------------------------------ function geq!(geq, Z̃, ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g) @@ -737,22 +837,22 @@ function get_nonlincon_oracle(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JN return dot(λeq, geq) end Z̃_∇geq = fill(myNaN, nZ̃) # NaN to force update at first call - ∇geq_context = ( + ∇geq_cache = ( Cache(ΔŨ), Cache(x̂0end), Cache(Ue), Cache(Ŷe), Cache(U0), Cache(Ŷ0), Cache(Û0), Cache(K0), Cache(X̂0), Cache(gc), Cache(g) ) - ∇geq_prep = prepare_jacobian(geq!, geq, jac, Z̃_∇geq, ∇geq_context...; strict) + ∇geq_prep = prepare_jacobian(geq!, geq, jac, Z̃_∇geq, ∇geq_cache...; strict) ∇geq = init_diffmat(JNT, jac, ∇geq_prep, nZ̃, neq) ∇geq_structure = init_diffstructure(∇geq) if !isnothing(hess) - ∇²geq_context = ( + ∇²geq_cache = ( Cache(ΔŨ), Cache(x̂0end), Cache(Ue), Cache(Ŷe), Cache(U0), Cache(Ŷ0), Cache(Û0), Cache(K0), Cache(X̂0), Cache(gc), Cache(geq), Cache(g) ) ∇²geq_prep = prepare_hessian( - ℓ_geq, hess, Z̃_∇geq, Constant(λeq), ∇²geq_context...; strict + ℓ_geq, hess, Z̃_∇geq, Constant(λeq), ∇²geq_cache...; strict ) ∇²ℓ_geq = init_diffmat(JNT, hess, ∇²geq_prep, nZ̃, nZ̃) ∇²geq_structure = lowertriangle_indices(init_diffstructure(∇²ℓ_geq)) @@ -760,7 +860,7 @@ function get_nonlincon_oracle(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JN function update_con_eq!(geq, ∇geq, Z̃_∇geq, Z̃_arg) if isdifferent(Z̃_arg, Z̃_∇geq) Z̃_∇geq .= Z̃_arg - value_and_jacobian!(geq!, geq, ∇geq, ∇geq_prep, jac, Z̃_∇geq, ∇geq_context...) + value_and_jacobian!(geq!, geq, ∇geq, ∇geq_prep, jac, Z̃_∇geq, ∇geq_cache...) end return nothing end @@ -775,7 +875,7 @@ function get_nonlincon_oracle(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JN function ∇²geq_func!(∇²ℓ_arg, Z̃_arg, λ_arg) Z̃_∇geq .= Z̃_arg λeq .= λ_arg - hessian!(ℓ_geq, ∇²ℓ_geq, ∇²geq_prep, hess, Z̃_∇geq, Constant(λeq), ∇²geq_context...) + hessian!(ℓ_geq, ∇²ℓ_geq, ∇²geq_prep, hess, Z̃_∇geq, Constant(λeq), ∇²geq_cache...) return diffmat2vec!(∇²ℓ_arg, ∇²ℓ_geq, ∇²geq_structure) end geq_min = geq_max = zeros(JNT, neq) @@ -792,130 +892,6 @@ function get_nonlincon_oracle(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JN return g_oracle, geq_oracle end -""" - get_nonlinobj_op(mpc::NonLinMPC, optim::JuMP.GenericModel{JNT}) -> J_op - -Return the nonlinear operator for the objective function of `mpc` [`NonLinMPC`](@ref). - -It is based on the splatting syntax. This method is really intricate and that's because of: - -- These functions are used inside the nonlinear optimization, so they must be type-stable - and as efficient as possible. All the function outputs and derivatives are cached and - updated in-place if required to use the efficient [`value_and_gradient!`](@extref DifferentiationInterface DifferentiationInterface.value_and_jacobian!). -- The splatting syntax for objective functions implies the use of `Vararg{T,N}` (see the [performance tip](@extref Julia Be-aware-of-when-Julia-avoids-specializing)) - and memoization to avoid redundant computations. This is already complex, but it's even - worse knowing that the automatic differentiation tools do not support splatting. -- The signature of gradient and hessian functions is not the same for univariate (`nZ̃ == 1`) - and multivariate (`nZ̃ > 1`) operators in `JuMP`. Both must be defined. -""" -function get_nonlinobj_op(mpc::NonLinMPC, optim::JuMP.GenericModel{JNT}) where JNT<:Real - model = mpc.estim.model - transcription = mpc.transcription - grad, hess = mpc.gradient, mpc.hessian - nu, ny, nx̂, nϵ = model.nu, model.ny, mpc.estim.nx̂, mpc.nϵ - nk = get_nk(model, transcription) - Hp, Hc = mpc.Hp, mpc.Hc - ng = length(mpc.con.i_g) - nc, neq = mpc.con.nc, mpc.con.neq - nZ̃, nU, nŶ, nX̂, nK = length(mpc.Z̃), Hp*nu, Hp*ny, Hp*nx̂, Hp*nk - nΔŨ, nUe, nŶe = nu*Hc + nϵ, nU + nu, nŶ + ny - strict = Val(true) - myNaN = convert(JNT, NaN) - J::Vector{JNT} = zeros(JNT, 1) - ΔŨ::Vector{JNT} = zeros(JNT, nΔŨ) - x̂0end::Vector{JNT} = zeros(JNT, nx̂) - K0::Vector{JNT} = zeros(JNT, nK) - Ue::Vector{JNT}, Ŷe::Vector{JNT} = zeros(JNT, nUe), zeros(JNT, nŶe) - U0::Vector{JNT}, Ŷ0::Vector{JNT} = zeros(JNT, nU), zeros(JNT, nŶ) - Û0::Vector{JNT}, X̂0::Vector{JNT} = zeros(JNT, nU), zeros(JNT, nX̂) - gc::Vector{JNT}, g::Vector{JNT} = zeros(JNT, nc), zeros(JNT, ng) - geq::Vector{JNT} = zeros(JNT, neq) - function J!(Z̃, ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g, geq) - update_predictions!(ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g, geq, mpc, Z̃) - return obj_nonlinprog!(Ŷ0, U0, mpc, model, Ue, Ŷe, ΔŨ) - end - Z̃_J = fill(myNaN, nZ̃) # NaN to force update at first call - J_context = ( - Cache(ΔŨ), Cache(x̂0end), Cache(Ue), Cache(Ŷe), Cache(U0), Cache(Ŷ0), - Cache(Û0), Cache(K0), Cache(X̂0), - Cache(gc), Cache(g), Cache(geq), - ) - ∇J_prep = prepare_gradient(J!, grad, Z̃_J, J_context...; strict) - ∇J = Vector{JNT}(undef, nZ̃) - if !isnothing(hess) - ∇²J_prep = prepare_hessian(J!, hess, Z̃_J, J_context...; strict) - ∇²J = init_diffmat(JNT, hess, ∇²J_prep, nZ̃, nZ̃) - end - update_objective! = if !isnothing(hess) - function (J, ∇J, ∇²J, Z̃_J, Z̃_arg) - if isdifferent(Z̃_arg, Z̃_J) - Z̃_J .= Z̃_arg - J[], _ = value_gradient_and_hessian!(J!, ∇J, ∇²J, hess, Z̃_J, J_context...) - end - end - else - update_objective! = function (J, ∇J, Z̃_∇J, Z̃_arg) - if isdifferent(Z̃_arg, Z̃_∇J) - Z̃_∇J .= Z̃_arg - J[], _ = value_and_gradient!(J!, ∇J, ∇J_prep, grad, Z̃_∇J, J_context...) - end - end - end - J_func = if !isnothing(hess) - function (Z̃_arg::Vararg{T, N}) where {N, T<:Real} - update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg) - return J[]::T - end - else - function (Z̃_arg::Vararg{T, N}) where {N, T<:Real} - update_objective!(J, ∇J, Z̃_J, Z̃_arg) - return J[]::T - end - end - ∇J_func! = if nZ̃ == 1 # univariate syntax (see JuMP.@operator doc): - if !isnothing(hess) - function (Z̃_arg) - update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg) - return ∇J[] - end - else - function (Z̃_arg) - update_objective!(J, ∇J, Z̃_J, Z̃_arg) - return ∇J[] - end - end - else # multivariate syntax (see JuMP.@operator doc): - if !isnothing(hess) - function (∇J_arg::AbstractVector{T}, Z̃_arg::Vararg{T, N}) where {N, T<:Real} - update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg) - return ∇J_arg .= ∇J - end - else - function (∇J_arg::AbstractVector{T}, Z̃_arg::Vararg{T, N}) where {N, T<:Real} - update_objective!(J, ∇J, Z̃_J, Z̃_arg) - return ∇J_arg .= ∇J - end - end - end - ∇²J_func! = if nZ̃ == 1 # univariate syntax (see JuMP.@operator doc): - function (Z̃_arg) - update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg) - return ∇²J[] - end - else # multivariate syntax (see JuMP.@operator doc): - function (∇²J_arg::AbstractMatrix{T}, Z̃_arg::Vararg{T, N}) where {N, T<:Real} - update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg) - return fill_lowertriangle!(∇²J_arg, ∇²J) - end - end - if !isnothing(hess) - @operator(optim, J_op, nZ̃, J_func, ∇J_func!, ∇²J_func!) - else - @operator(optim, J_op, nZ̃, J_func, ∇J_func!) - end - return J_op -end - """ update_predictions!( ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g, geq, diff --git a/src/estimator/mhe.jl b/src/estimator/mhe.jl index 008a490e4..f466569b4 100644 --- a/src/estimator/mhe.jl +++ b/src/estimator/mhe.jl @@ -13,6 +13,7 @@ end function print_backends(io::IO, estim::MovingHorizonEstimator, ::SimModel) println(io, "├ gradient: $(backend_str(estim.gradient))") println(io, "├ jacobian: $(backend_str(estim.jacobian))") + println(io, "├ hessian: $(backend_str(estim.hessian))") end "No differentiation backends to print for `LinModel`." print_backends(::IO, ::MovingHorizonEstimator, ::LinModel) = nothing diff --git a/src/estimator/mhe/construct.jl b/src/estimator/mhe/construct.jl index 40474f426..ec92fc199 100644 --- a/src/estimator/mhe/construct.jl +++ b/src/estimator/mhe/construct.jl @@ -2,6 +2,11 @@ const DEFAULT_LINMHE_OPTIMIZER = OSQP.MathOptInterfaceOSQP.Optimizer const DEFAULT_NONLINMHE_OPTIMIZER = optimizer_with_attributes(Ipopt.Optimizer,"sb"=>"yes") const DEFAULT_NONLINMHE_GRADIENT = AutoForwardDiff() const DEFAULT_NONLINMHE_JACOBIAN = AutoForwardDiff() +const DEFAULT_NONLINMHE_HESSIAN = AutoSparse( + AutoForwardDiff(); + sparsity_detector=TracerSparsityDetector(), + coloring_algorithm=GreedyColoringAlgorithm(), +) @doc raw""" Include all the data for the constraints of [`MovingHorizonEstimator`](@ref). @@ -51,6 +56,7 @@ struct MovingHorizonEstimator{ JM<:JuMP.GenericModel, GB<:AbstractADType, JB<:AbstractADType, + HB<:Union{AbstractADType, Nothing}, CE<:KalmanEstimator, } <: StateEstimator{NT} model::SM @@ -61,6 +67,7 @@ struct MovingHorizonEstimator{ con::EstimatorConstraint{NT} gradient::GB jacobian::JB + hessian::HB oracle::Bool covestim::CE Z̃::Vector{NT} @@ -113,7 +120,7 @@ struct MovingHorizonEstimator{ function MovingHorizonEstimator{NT}( model::SM, He, i_ym, nint_u, nint_ym, cov::KC, Cwt, - optim::JM, gradient::GB, jacobian::JB, oracle, covestim::CE; + optim::JM, gradient::GB, jacobian::JB, hessian::HB, oracle, covestim::CE; direct=true ) where { NT<:Real, @@ -122,6 +129,7 @@ struct MovingHorizonEstimator{ JM<:JuMP.GenericModel, GB<:AbstractADType, JB<:AbstractADType, + HB<:Union{AbstractADType, Nothing}, CE<:KalmanEstimator{NT} } nu, ny, nd, nk = model.nu, model.ny, model.nd, model.nk @@ -158,11 +166,11 @@ struct MovingHorizonEstimator{ P̂arr_old = copy(cov.P̂_0) Nk = [0] corrected = [false] - estim = new{NT, SM, KC, JM, GB, JB, CE}( + estim = new{NT, SM, KC, JM, GB, JB, HB, CE}( model, cov, optim, con, - gradient, jacobian, oracle, + gradient, jacobian, hessian, oracle, covestim, Z̃, lastu0, x̂op, f̂op, x̂0, He, nε, @@ -268,8 +276,11 @@ transcription for now. function when `model` is not a [`LinModel`](@ref), see [`DifferentiationInterface` doc](@extref DifferentiationInterface List). - `jacobian=AutoForwardDiff()` : an `AbstractADType` backend for the Jacobian of the constraints when `model` is not a [`LinModel`](@ref), see `gradient` above for the options. -- `oracle=JuMP.solver_name(optim)=="Ipopt"`: use the efficient [`VectorNonlinearOracle`](@extref MathOptInterface MathOptInterface.VectorNonlinearOracle) - for the nonlinear constraints (not supported by most optimizers for now). +- `hessian=false` : an `AbstractADType` backend for the Hessian of the Lagrangian, see + `gradient` above for the options. The default `false` skip it and use the quasi-Newton + method of `optim`, which is always the case if `oracle=false` (see Extended Help). +- `oracle=JuMP.solver_name(optim)=="Ipopt"` : a `Bool` to use the [`VectorNonlinearOracle`](@extref MathOptInterface MathOptInterface.VectorNonlinearOracle) + for efficient nonlinear constraints (not supported by most optimizers for now). - `direct=true`: construct with a direct transmission from ``\mathbf{y^m}`` (a.k.a. current estimator, in opposition to the delayed/predictor form). @@ -280,10 +291,11 @@ julia> model = NonLinModel((x,u,_,_)->0.1x+u, (x,_,_)->2x, 10.0, 1, 1, 1, solver julia> estim = MovingHorizonEstimator(model, He=5, σR=[1], σP_0=[0.01]) MovingHorizonEstimator estimator with a sample time Ts = 10.0 s: ├ model: NonLinModel -├ optimizer: Ipopt +├ optimizer: Ipopt ├ gradient: AutoForwardDiff ├ jacobian: AutoForwardDiff -├ arrival covariance: UnscentedKalmanFilter +├ hessian: nothing +├ arrival covariance: UnscentedKalmanFilter └ dimensions: ├ 5 estimation steps He ├ 0 slack variable ε (estimation constraints) @@ -363,7 +375,16 @@ MovingHorizonEstimator estimator with a sample time Ts = 10.0 s: [`JuMP` documentation](@extref JuMP Common-mistakes-when-writing-a-user-defined-operator) for common mistakes when writing these functions. Also, an [`UnscentedKalmanFilter`](@ref) estimates the arrival covariance by default. - + + One exception about AD: the selected backend for the Hessian of the Lagrangian function + with `oracle=true` and `hessian=true` options is sparse: + ```julia + AutoSparse( + AutoForwardDiff(); + sparsity_detector = TracerSparsityDetector(), + coloring_algorithm = GreedyColoringAlgorithm() + ) + ``` The slack variable ``ε`` relaxes the constraints if enabled, see [`setconstraint!`](@ref). It is disabled by default for the MHE (from `Cwt=Inf`) but it should be activated for problems with two or more types of bounds, to ensure feasibility (e.g. on the estimated @@ -389,6 +410,7 @@ function MovingHorizonEstimator( optim::JM = default_optim_mhe(model), gradient::AbstractADType = DEFAULT_NONLINMHE_GRADIENT, jacobian::AbstractADType = DEFAULT_NONLINMHE_JACOBIAN, + hessian::Union{AbstractADType, Bool, Nothing} = false, oracle::Bool = JuMP.solver_name(optim)=="Ipopt", direct = true, σP_0 = sigmaP_0, @@ -406,7 +428,7 @@ function MovingHorizonEstimator( isnothing(He) && throw(ArgumentError("Estimation horizon He must be explicitly specified")) return MovingHorizonEstimator( model, He, i_ym, nint_u, nint_ym, P̂_0, Q̂, R̂, Cwt; - direct, optim, gradient, jacobian, oracle + direct, optim, gradient, jacobian, hessian, oracle ) end @@ -440,17 +462,19 @@ function MovingHorizonEstimator( optim::JM = default_optim_mhe(model), gradient::AbstractADType = DEFAULT_NONLINMHE_GRADIENT, jacobian::AbstractADType = DEFAULT_NONLINMHE_JACOBIAN, + hessian::Union{AbstractADType, Bool, Nothing} = false, oracle::Bool = JuMP.solver_name(optim)=="Ipopt", direct = true, covestim::CE = default_covestim_mhe(model, i_ym, nint_u, nint_ym, P̂_0, Q̂, R̂; direct) ) where {NT<:Real, SM<:SimModel{NT}, JM<:JuMP.GenericModel, CE<:StateEstimator{NT}} P̂_0, Q̂, R̂ = to_mat(P̂_0), to_mat(Q̂), to_mat(R̂) cov = KalmanCovariances(model, i_ym, nint_u, nint_ym, Q̂, R̂, P̂_0, He) + hessian = validate_hessian(hessian, gradient, oracle, DEFAULT_NONLINMHE_HESSIAN) validate_covestim(cov, covestim) return MovingHorizonEstimator{NT}( model, He, i_ym, nint_u, nint_ym, cov, Cwt, - optim, gradient, jacobian, oracle, covestim; + optim, gradient, jacobian, hessian, oracle, covestim; direct ) end @@ -735,7 +759,7 @@ Re-construct nonlinear constraints and add them to `estim.optim`. function reset_nonlincon!(estim::MovingHorizonEstimator, model::NonLinModel) optim = estim.optim if estim.oracle - g_oracle, = get_nonlinops(estim, optim) + g_oracle = get_nonlincon_oracle(estim, optim) set_nonlincon!(estim, model, optim, g_oracle) else set_nonlincon_leg!(estim, model, optim) @@ -1304,8 +1328,8 @@ function init_optimization!( end # constraints with vector nonlinear oracle, objective function with splatting: if estim.oracle - g_oracle, J_op = get_nonlinops(estim, optim) - optim[:J_op] = J_op + J_op = get_nonlinobj_op(estim, optim) + g_oracle = get_nonlincon_oracle(estim, optim) else J_func, ∇J_func!, g_funcs, ∇g_funcs! = get_optim_functions(estim, optim) @operator(optim, J_op, nZ̃, J_func, ∇J_func!) @@ -1321,72 +1345,193 @@ function init_optimization!( end """ - get_nonlinops(estim::MovingHorizonEstimator, optim) -> g_oracle, J_op + get_nonlinobj_op(estim::MovingHorizonEstimator, optim) -> J_op -Return the operators for the nonlinear optimization of [`MovingHorizonEstimator`](@ref). +Return the nonlinear operator for the objective of `estim` [`MovingHorizonEstimator`](@ref). -Return `g_oracle`, the [`VectorNonlinearOracle`](@extref MathOptInterface MathOptInterface.VectorNonlinearOracle) -for the ineqaulity constraints. Note that `g_oracle` only includes the non-`Inf` -inequality constraints, thus it must be re-constructed if they change. Also return `J_op`, -the [`NonlinearOperator`](@extref JuMP NonlinearOperator) for the objective function, based -on the splatting syntax. This method is really intricate and that's because of 2 elements: +It is based on the splatting syntax. This method is really intricate and that's because of: - These functions are used inside the nonlinear optimization, so they must be type-stable and as efficient as possible. All the function outputs and derivatives are cached and - updated in-place if required to use the efficient [`value_and_jacobian!`](@extref DifferentiationInterface DifferentiationInterface.value_and_jacobian!). + updated in-place if required to use the efficient [`value_and_gradient!`](@extref DifferentiationInterface DifferentiationInterface.value_and_jacobian!). - The splatting syntax for objective functions implies the use of `Vararg{T,N}` (see the [performance tip](@extref Julia Be-aware-of-when-Julia-avoids-specializing)) and memoization to avoid redundant computations. This is already complex, but it's even worse knowing that the automatic differentiation tools do not support splatting. """ -function get_nonlinops( +function get_nonlinobj_op( estim::MovingHorizonEstimator, optim::JuMP.GenericModel{JNT} ) where JNT<:Real - # ----------- common cache for Jfunc and gfuncs -------------------------------------- model, con = estim.model, estim.con - grad, jac = estim.gradient, estim.jacobian - nx̂, nym, nŷ, nu, nε, nk = estim.nx̂, estim.nym, model.ny, model.nu, estim.nε, model.nk + grad, hess = estim.gradient, estim.hessian + nx̂, nym, nŷ, nu, nk = estim.nx̂, estim.nym, model.ny, model.nu, model.nk + He = estim.He + ng = length(con.i_g) + nV̂, nX̂, ng, nZ̃ = He*nym, He*nx̂, length(con.i_g), length(estim.Z̃) + strict = Val(true) + myNaN = convert(JNT, NaN) + J::Vector{JNT} = zeros(JNT, 1) + V̂::Vector{JNT}, X̂0::Vector{JNT} = zeros(JNT, nV̂), zeros(JNT, nX̂) + k0::Vector{JNT} = zeros(JNT, nk) + û0::Vector{JNT}, ŷ0::Vector{JNT} = zeros(JNT, nu), zeros(JNT, nŷ) + g::Vector{JNT} = zeros(JNT, ng) + x̄::Vector{JNT} = zeros(JNT, nx̂) + function J!(Z̃, V̂, X̂0, û0, k0, ŷ0, g, x̄) + update_prediction!(V̂, X̂0, û0, k0, ŷ0, g, estim, Z̃) + return obj_nonlinprog!(x̄, estim, model, V̂, Z̃) + end + Z̃_J = fill(myNaN, nZ̃) # NaN to force update_predictions! at first call + J_cache = ( + Cache(V̂), Cache(X̂0), Cache(û0), Cache(k0), Cache(ŷ0), + Cache(g), + Cache(x̄), + ) + # temporarily "fill" the estimation window for the preparation of the gradient: + estim.Nk[] = He + ∇J_prep = prepare_gradient(J!, grad, Z̃_J, J_cache...; strict) + estim.Nk[] = 0 + ∇J = Vector{JNT}(undef, nZ̃) + if !isnothing(hess) + estim.Nk[] = He # see comment above + ∇²J_prep = prepare_hessian(J!, hess, Z̃_J, J_cache...; strict) + estim.Nk[] = 0 + ∇²J = init_diffmat(JNT, hess, ∇²J_prep, nZ̃, nZ̃) + end + update_objective! = if !isnothing(hess) + function (J, ∇J, ∇²J, Z̃_∇J, Z̃_arg) + if isdifferent(Z̃_arg, Z̃_∇J) + Z̃_∇J .= Z̃_arg + J[], _ = value_gradient_and_hessian!( + J!, ∇J, ∇²J, ∇²J_prep, hess, Z̃_J, J_cache... + ) + end + end + else + function (J, ∇J, Z̃_∇J, Z̃_arg) + if isdifferent(Z̃_arg, Z̃_∇J) + Z̃_∇J .= Z̃_arg + J[], _ = value_and_gradient!(J!, ∇J, ∇J_prep, grad, Z̃_J, J_cache...) + end + end + end + J_func = if !isnothing(hess) + function (Z̃_arg::Vararg{T, N}) where {N, T<:Real} + update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg) + return J[]::T + end + else + function (Z̃_arg::Vararg{T, N}) where {N, T<:Real} + update_objective!(J, ∇J, Z̃_J, Z̃_arg) + return J[]::T + end + end + # only the multivariate syntax of JuMP.@operator, univariate is impossible for MHE + # since Z̃ comprises the arrival state estimate AND the estimated process noise: + ∇J_func! = if !isnothing(hess) + function (∇J_arg::AbstractVector{T}, Z̃_arg::Vararg{T, N}) where {N, T<:Real} + update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg) + return ∇J_arg .= ∇J + end + else + function (∇J_arg::AbstractVector{T}, Z̃_arg::Vararg{T, N}) where {N, T<:Real} + update_objective!(J, ∇J, Z̃_J, Z̃_arg) + return ∇J_arg .= ∇J + end + end + function ∇²J_func!(∇²J_arg::AbstractMatrix{T}, Z̃_arg::Vararg{T, N}) where {N, T<:Real} + update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg) + return fill_lowertriangle!(∇²J_arg, ∇²J) + end + if !isnothing(hess) + @operator(optim, J_op, nZ̃, J_func, ∇J_func!, ∇²J_func!) + else + @operator(optim, J_op, nZ̃, J_func, ∇J_func!) + end + return J_op +end + +""" + get_nonlincon_oracle(estim::MovingHorizonEstimator, optim) -> g_oracle, geq_oracle + +Return the nonlinear constraint oracles for [`MovingHorizonEstimator`](@ref) `estim`. + +Return `g_oracle` and `geq_oracle`, the inequality and equality [`VectorNonlinearOracle`](@extref MathOptInterface MathOptInterface.VectorNonlinearOracle) +for the two respective constraints. Note that `g_oracle` only includes the non-`Inf` +inequality constraints, thus it must be re-constructed if they change. This method is really +intricate because the oracles are used inside the nonlinear optimization, so they must be +type-stable and as efficient as possible. All the function outputs and derivatives are +ached and updated in-place if required to use the efficient [`value_and_jacobian!`](@extref DifferentiationInterface DifferentiationInterface.value_and_jacobian!). +""" +function get_nonlincon_oracle( + estim::MovingHorizonEstimator, ::JuMP.GenericModel{JNT} +) where JNT<:Real + # ----------- common cache for all functions ---------------------------------------- + model, con = estim.model, estim.con + jac, hess = estim.jacobian, estim.hessian + nx̂, nym, nŷ, nu, nk = estim.nx̂, estim.nym, model.ny, model.nu, model.nk He = estim.He i_g = findall(con.i_g) # convert to non-logical indices for non-allocating @views ng, ngi = length(con.i_g), sum(con.i_g) nV̂, nX̂, ng, nZ̃ = He*nym, He*nx̂, length(con.i_g), length(estim.Z̃) strict = Val(true) myNaN, myInf = convert(JNT, NaN), convert(JNT, Inf) - J::Vector{JNT} = zeros(JNT, 1) V̂::Vector{JNT}, X̂0::Vector{JNT} = zeros(JNT, nV̂), zeros(JNT, nX̂) k0::Vector{JNT} = zeros(JNT, nk) û0::Vector{JNT}, ŷ0::Vector{JNT} = zeros(JNT, nu), zeros(JNT, nŷ) g::Vector{JNT}, gi::Vector{JNT} = zeros(JNT, ng), zeros(JNT, ngi) - x̄::Vector{JNT} = zeros(JNT, nx̂) - # -------------- inequality constraint: nonlinear oracle ----------------------------- + λi::Vector{JNT} = ones(JNT, ngi) + # -------------- inequality constraint: nonlinear oracle ------------------------- function gi!(gi, Z̃, V̂, X̂0, û0, k0, ŷ0, g) update_prediction!(V̂, X̂0, û0, k0, ŷ0, g, estim, Z̃) gi .= @views g[i_g] return nothing end + function ℓ_gi(Z̃, λi, V̂, X̂0, û0, k0, ŷ0, g, gi) + update_prediction!(V̂, X̂0, û0, k0, ŷ0, g, estim, Z̃) + gi .= @views g[i_g] + return dot(λi, gi) + end Z̃_∇gi = fill(myNaN, nZ̃) # NaN to force update_predictions! at first call - ∇gi_context = ( + ∇gi_cache = ( Cache(V̂), Cache(X̂0), Cache(û0), Cache(k0), Cache(ŷ0), Cache(g) ) # temporarily "fill" the estimation window for the preparation of the gradient: estim.Nk[] = He - ∇gi_prep = prepare_jacobian(gi!, gi, jac, Z̃_∇gi, ∇gi_context...; strict) + ∇gi_prep = prepare_jacobian(gi!, gi, jac, Z̃_∇gi, ∇gi_cache...; strict) estim.Nk[] = 0 ∇gi = init_diffmat(JNT, jac, ∇gi_prep, nZ̃, ngi) ∇gi_structure = init_diffstructure(∇gi) + if !isnothing(hess) + ∇²gi_cache = ( + Cache(V̂), Cache(X̂0), Cache(û0), Cache(k0), Cache(ŷ0), Cache(g), Cache(gi) + ) + estim.Nk[] = He # see comment above + ∇²gi_prep = prepare_hessian( + ℓ_gi, hess, Z̃_∇gi, Constant(λi), ∇²gi_cache...; strict + ) + estim.Nk[] = 0 + ∇²ℓ_gi = init_diffmat(JNT, hess, ∇²gi_prep, nZ̃, nZ̃) + ∇²gi_structure = lowertriangle_indices(init_diffstructure(∇²ℓ_gi)) + end function update_con!(gi, ∇gi, Z̃_∇gi, Z̃_arg) if isdifferent(Z̃_arg, Z̃_∇gi) Z̃_∇gi .= Z̃_arg - value_and_jacobian!(gi!, gi, ∇gi, ∇gi_prep, jac, Z̃_∇gi, ∇gi_context...) + value_and_jacobian!(gi!, gi, ∇gi, ∇gi_prep, jac, Z̃_∇gi, ∇gi_cache...) end return nothing end - function gi_func!(gi_vec, Z̃_arg) + function gi_func!(gi_arg, Z̃_arg) update_con!(gi, ∇gi, Z̃_∇gi, Z̃_arg) - return gi_vec .= gi + return gi_arg .= gi end - function ∇gi_func!(∇gi_vec, Z̃_arg) + function ∇gi_func!(∇gi_arg, Z̃_arg) update_con!(gi, ∇gi, Z̃_∇gi, Z̃_arg) - return diffmat2vec!(∇gi_vec, ∇gi, ∇gi_structure) + return diffmat2vec!(∇gi_arg, ∇gi, ∇gi_structure) + end + function ∇²gi_func!(∇²ℓ_arg, Z̃_arg, λ_arg) + Z̃_∇gi .= Z̃_arg + λi .= λ_arg + hessian!(ℓ_gi, ∇²ℓ_gi, ∇²gi_prep, hess, Z̃_∇gi, Constant(λi), ∇²gi_cache...) + return diffmat2vec!(∇²ℓ_arg, ∇²ℓ_gi, ∇²gi_structure) end gi_min = fill(-myInf, ngi) gi_max = zeros(JNT, ngi) @@ -1396,42 +1541,11 @@ function get_nonlinops( u = gi_max, eval_f = gi_func!, jacobian_structure = ∇gi_structure, - eval_jacobian = ∇gi_func! + eval_jacobian = ∇gi_func!, + hessian_lagrangian_structure = isnothing(hess) ? Tuple{Int,Int}[] : ∇²gi_structure, + eval_hessian_lagrangian = isnothing(hess) ? nothing : ∇²gi_func! ) - # ------------- objective function: splatting syntax --------------------------------- - function J!(Z̃, V̂, X̂0, û0, k0, ŷ0, g, x̄) - update_prediction!(V̂, X̂0, û0, k0, ŷ0, g, estim, Z̃) - return obj_nonlinprog!(x̄, estim, model, V̂, Z̃) - end - Z̃_∇J = fill(myNaN, nZ̃) # NaN to force update_predictions! at first call - ∇J_context = ( - Cache(V̂), Cache(X̂0), Cache(û0), Cache(k0), Cache(ŷ0), - Cache(g), - Cache(x̄), - ) - # temporarily "fill" the estimation window for the preparation of the gradient: - estim.Nk[] = He - ∇J_prep = prepare_gradient(J!, grad, Z̃_∇J, ∇J_context...; strict) - estim.Nk[] = 0 - ∇J = Vector{JNT}(undef, nZ̃) - function update_objective!(J, ∇J, Z̃_∇J, Z̃_arg) - if isdifferent(Z̃_arg, Z̃_∇J) - Z̃_∇J .= Z̃_arg - J[], _ = value_and_gradient!(J!, ∇J, ∇J_prep, grad, Z̃_∇J, ∇J_context...) - end - end - function J_func(Z̃_arg::Vararg{T, N}) where {N, T<:Real} - update_objective!(J, ∇J, Z̃_∇J, Z̃_arg) - return J[]::T - end - function ∇J_func!(∇J_arg::AbstractVector{T}, Z̃_arg::Vararg{T, N}) where {N, T<:Real} - # only the multivariate syntax of JuMP.@operator, univariate is impossible for MHE - # since Z̃ comprises the arrival state estimate AND the estimated process noise - update_objective!(J, ∇J, Z̃_∇J, Z̃_arg) - return ∇J_arg .= ∇J - end - J_op = JuMP.add_nonlinear_operator(optim, nZ̃, J_func, ∇J_func!, name=:J_op) - g_oracle, J_op + return g_oracle end "By default, there is no nonlinear constraint, thus do nothing." diff --git a/src/general.jl b/src/general.jl index b608bb415..6845de7da 100644 --- a/src/general.jl +++ b/src/general.jl @@ -110,6 +110,30 @@ dense_backend(backend::AbstractADType) = backend dense_backend(backend::AutoSparse) = backend.dense_ad dense_backend(backend::SecondOrder) = backend.inner +"Validate `hessian` keyword argument and return the differentiation `backend`." +function validate_hessian(hessian, gradient, oracle, default) + if hessian == true + backend = default + elseif hessian == false || isnothing(hessian) + backend = nothing + else + backend = hessian + end + if oracle == false && !isnothing(backend) + error("Second order derivatives are only supported with oracle=true.") + end + if oracle == true && !isnothing(backend) + hess = dense_backend(backend) + grad = dense_backend(gradient) + if hess != grad + @info "The objective function gradient will be computed with the hessian "* + "backend ($(backend_str(hess)))\n instead of the one in gradient "* + "argument ($(backend_str(grad))) for efficiency." + end + end + return backend +end + "Verify that x and y elements are different using `!==`." isdifferent(x, y) = any(xi !== yi for (xi, yi) in zip(x, y)) diff --git a/test/2_test_state_estim.jl b/test/2_test_state_estim.jl index 6dcb3bc0e..892887889 100644 --- a/test/2_test_state_estim.jl +++ b/test/2_test_state_estim.jl @@ -895,10 +895,14 @@ end @test isa(mhe13, MovingHorizonEstimator{Float32}) mhe14 = MovingHorizonEstimator( - nonlinmodel, He=5, gradient=AutoFiniteDiff(), jacobian=AutoFiniteDiff() + nonlinmodel, He=5, + gradient=AutoFiniteDiff(), + jacobian=AutoFiniteDiff(), + hessian=AutoFiniteDiff() ) @test mhe14.gradient == AutoFiniteDiff() @test mhe14.jacobian == AutoFiniteDiff() + @test mhe14.hessian == AutoFiniteDiff() @test_throws ArgumentError MovingHorizonEstimator(linmodel) @test_throws ArgumentError MovingHorizonEstimator(linmodel, He=0) @@ -952,30 +956,39 @@ end preparestate!(mhe1, [51, 32], [5]) @test mhe1([5]) ≈ [51, 32] atol=1e-3 - mhe1 = MovingHorizonEstimator(nonlinmodel, He=2, nint_u=[1, 1], nint_ym=[0, 0], direct=false) - JuMP.set_attribute(mhe1.optim, "tol", 1e-7) - preparestate!(mhe1, [50, 30], [5]) - x̂ = updatestate!(mhe1, [10, 50], [50, 30], [5]) + mhe1b = MovingHorizonEstimator(nonlinmodel, He=2, nint_u=[1, 1], nint_ym=0, direct=false) + JuMP.set_attribute(mhe1b.optim, "tol", 1e-7) + preparestate!(mhe1b, [50, 30], [5]) + x̂ = updatestate!(mhe1b, [10, 50], [50, 30], [5]) @test x̂ ≈ zeros(6) atol=1e-9 - @test mhe1.x̂0 ≈ zeros(6) atol=1e-9 - info = getinfo(mhe1) + @test mhe1b.x̂0 ≈ zeros(6) atol=1e-9 + info = getinfo(mhe1b) @test info[:x̂] ≈ x̂ atol=1e-9 @test info[:Ŷ][end-1:end] ≈ [50, 30] atol=1e-9 - @test initstate!(mhe1, [11, 52], [50, 30], [5]) ≈ zeros(6) atol=1e-9 - @test mhe1.lastu0 ≈ [1, 2] - setstate!(mhe1, [1,2,3,4,5,6]) - @test mhe1.x̂0 ≈ [1,2,3,4,5,6] + mhe1c = MovingHorizonEstimator(nonlinmodel, He=2, direct=false, hessian=true) + preparestate!(mhe1c, [50, 30], [5]) + x̂ = updatestate!(mhe1c, [10, 50], [50, 30], [5]) + @test x̂ ≈ zeros(6) atol=1e-9 + @test mhe1c.x̂0 ≈ zeros(6) atol=1e-9 + info = getinfo(mhe1c) + @test info[:x̂] ≈ x̂ atol=1e-9 + @test info[:Ŷ][end-1:end] ≈ [50, 30] atol=1e-9 + + @test initstate!(mhe1c, [11, 52], [50, 30], [5]) ≈ zeros(6) atol=1e-9 + @test mhe1c.lastu0 ≈ [1, 2] + setstate!(mhe1c, [1,2,3,4,5,6]) + @test mhe1c.x̂0 ≈ [1,2,3,4,5,6] for i in 1:40 - preparestate!(mhe1, [50, 30], [5]) - updatestate!(mhe1, [11, 52], [50, 30], [5]) + preparestate!(mhe1c, [50, 30], [5]) + updatestate!(mhe1c, [11, 52], [50, 30], [5]) end - @test mhe1([5]) ≈ [50, 30] atol=1e-3 + @test mhe1c([5]) ≈ [50, 30] atol=1e-3 for i in 1:40 - preparestate!(mhe1, [51, 32], [5]) - updatestate!(mhe1, [10, 50], [51, 32], [5]) + preparestate!(mhe1c, [51, 32], [5]) + updatestate!(mhe1c, [10, 50], [51, 32], [5]) end - @test mhe1([5]) ≈ [51, 32] atol=1e-3 + @test mhe1c([5]) ≈ [51, 32] atol=1e-3 mhe2 = MovingHorizonEstimator(linmodel, He=2) preparestate!(mhe2, [50, 30], [5])