diff --git a/Project.toml b/Project.toml
index 4a369fbba..9447b49a6 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "ModelPredictiveControl"
 uuid = "61f9bdb8-6ae4-484a-811f-bbf86720c31c"
-version = "1.12.0"
+version = "1.13.0"
 authors = ["Francis Gagnon"]
 
 [deps]
diff --git a/benchmark/2_bench_state_estim.jl b/benchmark/2_bench_state_estim.jl
index 25dc87042..d49005861 100644
--- a/benchmark/2_bench_state_estim.jl
+++ b/benchmark/2_bench_state_estim.jl
@@ -305,6 +305,13 @@ mhe_pendulum_ipopt_curr = setconstraint!(mhe_pendulum_ipopt_curr; v̂min, v̂max
 JuMP.unset_time_limit_sec(mhe_pendulum_ipopt_curr.optim)
 JuMP.set_attribute(mhe_pendulum_ipopt_curr.optim, "tol", 1e-7)
 
+hessian = true
+mhe_pendulum_ipopt_currh = MovingHorizonEstimator(
+    model; He, σQ, σR, nint_u, σQint_u, optim, direct, hessian
+)
+mhe_pendulum_ipopt_currh = setconstraint!(mhe_pendulum_ipopt_currh; v̂min, v̂max)
+JuMP.unset_time_limit_sec(mhe_pendulum_ipopt_currh.optim)
+
 optim = JuMP.Model(optimizer_with_attributes(Ipopt.Optimizer,"sb"=>"yes"), add_bridges=false)
 direct = false
 mhe_pendulum_ipopt_pred = MovingHorizonEstimator(
@@ -314,6 +321,14 @@ mhe_pendulum_ipopt_pred = setconstraint!(mhe_pendulum_ipopt_pred; v̂min, v̂max
 JuMP.unset_time_limit_sec(mhe_pendulum_ipopt_pred.optim)
 JuMP.set_attribute(mhe_pendulum_ipopt_pred.optim, "tol", 1e-7)
 
+hessian = true
+mhe_pendulum_ipopt_predh = MovingHorizonEstimator(
+    model; He, σQ, σR, nint_u, σQint_u, optim, direct, hessian
+)
+mhe_pendulum_ipopt_predh = setconstraint!(mhe_pendulum_ipopt_predh; v̂min, v̂max)
+JuMP.unset_time_limit_sec(mhe_pendulum_ipopt_predh.optim)
+JuMP.set_attribute(mhe_pendulum_ipopt_predh.optim, "tol", 1e-7)
+
 optim = JuMP.Model(MadNLP.Optimizer, add_bridges=false)
 direct = true
 mhe_pendulum_madnlp_curr = MovingHorizonEstimator(
@@ -338,11 +353,21 @@ CASE_ESTIM["Pendulum"]["MovingHorizonEstimator"]["Ipopt"]["Current form"] =
         sim!($mhe_pendulum_ipopt_curr, $N, $u; plant=$plant, x_0=$x_0, x̂_0=$x̂_0, progress=false),
         samples=samples, evals=evals, seconds=seconds
     )
+CASE_ESTIM["Pendulum"]["MovingHorizonEstimator"]["Ipopt"]["Current form (Hessian)"] =
+    @benchmarkable(
+        sim!($mhe_pendulum_ipopt_currh, $N, $u; plant=$plant, x_0=$x_0, x̂_0=$x̂_0, progress=false),
+        samples=samples, evals=evals, seconds=seconds
+    )
 CASE_ESTIM["Pendulum"]["MovingHorizonEstimator"]["Ipopt"]["Prediction form"] =
     @benchmarkable(
         sim!($mhe_pendulum_ipopt_pred, $N, $u; plant=$plant, x_0=$x_0, x̂_0=$x̂_0, progress=false),
         samples=samples, evals=evals, seconds=seconds
     )
+CASE_ESTIM["Pendulum"]["MovingHorizonEstimator"]["Ipopt"]["Prediction form (Hessian)"] =
+    @benchmarkable(
+        sim!($mhe_pendulum_ipopt_predh, $N, $u; plant=$plant, x_0=$x_0, x̂_0=$x̂_0, progress=false),
+        samples=samples, evals=evals, seconds=seconds
+    )
 CASE_ESTIM["Pendulum"]["MovingHorizonEstimator"]["MadNLP"]["Current form"] =
     @benchmarkable(
         sim!($mhe_pendulum_madnlp_curr, $N, $u; plant=$plant, x_0=$x_0, x̂_0=$x̂_0, progress=false),
diff --git a/benchmark/3_bench_predictive_control.jl b/benchmark/3_bench_predictive_control.jl
index 427f2e7e6..59b99ffdf 100644
--- a/benchmark/3_bench_predictive_control.jl
+++ b/benchmark/3_bench_predictive_control.jl
@@ -318,7 +318,13 @@ optim = JuMP.Model(MadNLP.Optimizer, add_bridges=false)
 transcription = SingleShooting()
 nmpc_madnlp_ss = NonLinMPC(estim; Hp, Hc, Mwt, Nwt, Cwt, optim, transcription)
 nmpc_madnlp_ss = setconstraint!(nmpc_madnlp_ss; umin, umax)
-JuMP.unset_time_limit_sec(nmpc_madnlp_ss.optim) 
+JuMP.unset_time_limit_sec(nmpc_madnlp_ss.optim)
+
+optim = JuMP.Model(()->UnoSolver.Optimizer(preset="filtersqp"), add_bridges=false)
+transcription, hessian, oracle = MultipleShooting(), true, true
+nmpc_uno_ms_hess = NonLinMPC(estim; Hp, Hc, Mwt, Nwt, Cwt, optim, transcription, hessian, oracle)
+nmpc_uno_ms_hess = setconstraint!(nmpc_uno_ms_hess; umin, umax)
+JuMP.unset_time_limit_sec(nmpc_uno_ms_hess.optim)
 
 # TODO: does not work well with MadNLP and MultipleShooting or TrapezoidalCollocation, 
 # figure out why. Current theory: 
@@ -375,6 +381,11 @@ CASE_MPC["Pendulum"]["NonLinMPC"]["Noneconomic"]["MadNLP"]["SingleShooting"] =
         sim!($nmpc_madnlp_ss, $N, $ry; plant=$plant, x_0=$x_0, x̂_0=$x̂_0, progress=false),
         samples=samples, evals=evals, seconds=seconds
     )
+CASE_MPC["Pendulum"]["NonLinMPC"]["Noneconomic"]["Uno"]["MultipleShooting (Hessian)"] = 
+    @benchmarkable(
+        sim!($nmpc_uno_ms_hess, $N, $ry; plant=$plant, x_0=$x_0, x̂_0=$x̂_0, progress=false),
+        samples=samples, evals=evals, seconds=seconds
+    )    
 
 # ----------------- Case study: Pendulum economic --------------------------------
 model2, p = pendulum_model2, pendulum_p2
diff --git a/benchmark/Project.toml b/benchmark/Project.toml
index 282e1fcac..0aa81c0ee 100644
--- a/benchmark/Project.toml
+++ b/benchmark/Project.toml
@@ -13,3 +13,4 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 SparseConnectivityTracer = "9f842d2f-2579-4b1d-911e-f412cf18a3f5"
 SparseMatrixColorings = "0a514795-09f3-496d-8182-132a7b665d35"
+UnoSolver = "1baa60ac-02f7-4b39-a7a8-2f4f58486b05"
diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl
index 4a83ede08..914ff804a 100644
--- a/benchmark/benchmarks.jl
+++ b/benchmark/benchmarks.jl
@@ -1,6 +1,6 @@
 using BenchmarkTools
 using ModelPredictiveControl, ControlSystemsBase, LinearAlgebra
-using JuMP, OSQP, DAQP, Ipopt, MadNLP
+using JuMP, OSQP, DAQP, Ipopt, MadNLP, UnoSolver
 
 const SUITE = BenchmarkGroup(["ModelPredictiveControl"])
 
diff --git a/docs/src/internals/state_estim.md b/docs/src/internals/state_estim.md
index caba31010..6b92cc2d7 100644
--- a/docs/src/internals/state_estim.md
+++ b/docs/src/internals/state_estim.md
@@ -18,7 +18,8 @@ ModelPredictiveControl.relaxX̂
 ModelPredictiveControl.relaxŴ
 ModelPredictiveControl.relaxV̂
 ModelPredictiveControl.init_matconstraint_mhe
-ModelPredictiveControl.get_nonlinops(::MovingHorizonEstimator, ::ModelPredictiveControl.GenericModel)
+ModelPredictiveControl.get_nonlinobj_op(::MovingHorizonEstimator, ::ModelPredictiveControl.GenericModel)
+ModelPredictiveControl.get_nonlincon_oracle(::MovingHorizonEstimator, ::ModelPredictiveControl.GenericModel)
 ```
 
 ## Augmented Model
diff --git a/src/controller/nonlinmpc.jl b/src/controller/nonlinmpc.jl
index cabafde4c..987be9ecc 100644
--- a/src/controller/nonlinmpc.jl
+++ b/src/controller/nonlinmpc.jl
@@ -222,11 +222,11 @@ This controller allocates memory at each time step for the optimization.
    function, see [`DifferentiationInterface` doc](@extref DifferentiationInterface List).
 - `jacobian=default_jacobian(transcription)` : an `AbstractADType` backend for the Jacobian
    of the nonlinear constraints, see `gradient` above for the options (default in Extended Help).
-- `hessian=false` : an `AbstractADType` backend for the Hessian of the Lagrangian, see 
-   `gradient` above for the options. The default `false` skip it and use the quasi-Newton
-    method of `optim`, which is always the case if `oracle=false` (see Extended Help).
-- `oracle=JuMP.solver_name(optim)=="Ipopt"`: use the efficient [`VectorNonlinearOracle`](@extref MathOptInterface MathOptInterface.VectorNonlinearOracle)
-   for the nonlinear constraints (not supported by most optimizers for now).
+- `hessian=false` : an `AbstractADType` backend or `Bool` for the Hessian of the Lagrangian, 
+   see `gradient` above for the options. The default `false` skip it and use the quasi-Newton
+   method of `optim`, which is always the case if `oracle=false` (see Extended Help).
+- `oracle=JuMP.solver_name(optim)=="Ipopt"` : a `Bool` to use the [`VectorNonlinearOracle`](@extref MathOptInterface MathOptInterface.VectorNonlinearOracle)
+   for efficient nonlinear constraints (not supported by most optimizers for now).
 - additional keyword arguments are passed to [`UnscentedKalmanFilter`](@ref) constructor 
   (or [`SteadyKalmanFilter`](@ref), for [`LinModel`](@ref)).
 
@@ -409,7 +409,7 @@ function NonLinMPC(
     validate_JE(NT, JE)
     gc! = get_mutating_gc(NT, gc)
     weights = ControllerWeights(estim.model, Hp, Hc, M_Hp, N_Hc, L_Hp, Cwt, Ewt)
-    hessian = validate_hessian(hessian, gradient, oracle)
+    hessian = validate_hessian(hessian, gradient, oracle, DEFAULT_NONLINMPC_HESSIAN)
     return NonLinMPC{NT}(
         estim, Hp, Hc, nb, weights, JE, gc!, nc, p, 
         transcription, optim, gradient, jacobian, hessian, oracle
@@ -512,34 +512,6 @@ function test_custom_functions(NT, model::SimModel, JE, gc!, nc, Uop, Yop, Dop,
     return nothing
 end
 
-"""
-    validate_hessian(hessian, gradient, oracle) -> backend
-
-Validate `hessian` argument and return the differentiation backend.
-"""
-function validate_hessian(hessian, gradient, oracle)
-    if hessian == true
-        backend = DEFAULT_NONLINMPC_HESSIAN
-    elseif hessian == false || isnothing(hessian)
-        backend = nothing
-    else
-        backend = hessian
-    end
-    if oracle == false && !isnothing(backend)
-        error("Second order derivatives are only supported with oracle=true.")
-    end
-    if oracle == true && !isnothing(backend)
-        hess = dense_backend(backend)
-        grad = dense_backend(gradient)
-        if hess != grad
-            @info "The objective function gradient will be computed with the hessian "*
-                "backend ($(backend_str(hess)))\n instead of the one in gradient "*
-                "argument ($(backend_str(grad))) for efficiency."
-        end
-    end
-    return backend
-end
-
 """
     addinfo!(info, mpc::NonLinMPC) -> info
 
@@ -626,6 +598,134 @@ function reset_nonlincon!(mpc::NonLinMPC)
     end
 end
 
+"""
+    get_nonlinobj_op(mpc::NonLinMPC, optim::JuMP.GenericModel{JNT}) -> J_op
+
+Return the nonlinear operator for the objective of `mpc` [`NonLinMPC`](@ref).
+
+It is based on the splatting syntax. This method is really intricate and that's because of:
+
+- These functions are used inside the nonlinear optimization, so they must be type-stable
+  and as efficient as possible. All the function outputs and derivatives are cached and
+  updated in-place if required to use the efficient [`value_and_gradient!`](@extref DifferentiationInterface DifferentiationInterface.value_and_jacobian!).
+- The splatting syntax for objective functions implies the use of `Vararg{T,N}` (see the [performance tip](@extref Julia Be-aware-of-when-Julia-avoids-specializing))
+  and memoization to avoid redundant computations. This is already complex, but it's even
+  worse knowing that the automatic differentiation tools do not support splatting.
+- The signature of gradient and hessian functions is not the same for univariate (`nZ̃ == 1`)
+  and multivariate (`nZ̃ > 1`) operators in `JuMP`. Both must be defined.
+"""
+function get_nonlinobj_op(mpc::NonLinMPC, optim::JuMP.GenericModel{JNT}) where JNT<:Real
+    model = mpc.estim.model
+    transcription = mpc.transcription
+    grad, hess = mpc.gradient, mpc.hessian
+    nu, ny, nx̂, nϵ = model.nu, model.ny, mpc.estim.nx̂, mpc.nϵ
+    nk = get_nk(model, transcription)
+    Hp, Hc = mpc.Hp, mpc.Hc
+    ng = length(mpc.con.i_g)
+    nc, neq = mpc.con.nc, mpc.con.neq
+    nZ̃, nU, nŶ, nX̂, nK = length(mpc.Z̃), Hp*nu, Hp*ny, Hp*nx̂, Hp*nk
+    nΔŨ, nUe, nŶe = nu*Hc + nϵ, nU + nu, nŶ + ny  
+    strict = Val(true)
+    myNaN                            = convert(JNT, NaN)
+    J::Vector{JNT}                   = zeros(JNT, 1)
+    ΔŨ::Vector{JNT}                  = zeros(JNT, nΔŨ)
+    x̂0end::Vector{JNT}               = zeros(JNT, nx̂)
+    K0::Vector{JNT}                  = zeros(JNT, nK)
+    Ue::Vector{JNT}, Ŷe::Vector{JNT} = zeros(JNT, nUe), zeros(JNT, nŶe)
+    U0::Vector{JNT}, Ŷ0::Vector{JNT} = zeros(JNT, nU),  zeros(JNT, nŶ)
+    Û0::Vector{JNT}, X̂0::Vector{JNT} = zeros(JNT, nU),  zeros(JNT, nX̂)
+    gc::Vector{JNT}, g::Vector{JNT}  = zeros(JNT, nc),  zeros(JNT, ng)
+    geq::Vector{JNT}                 = zeros(JNT, neq)
+    function J!(Z̃, ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g, geq)
+        update_predictions!(ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g, geq, mpc, Z̃)
+        return obj_nonlinprog!(Ŷ0, U0, mpc, model, Ue, Ŷe, ΔŨ)
+    end
+    Z̃_J = fill(myNaN, nZ̃)      # NaN to force update at first call
+    J_cache = (
+        Cache(ΔŨ), Cache(x̂0end), Cache(Ue), Cache(Ŷe), Cache(U0), Cache(Ŷ0), 
+        Cache(Û0), Cache(K0), Cache(X̂0), 
+        Cache(gc), Cache(g), Cache(geq),
+    )
+    ∇J_prep = prepare_gradient(J!, grad, Z̃_J, J_cache...; strict)
+    ∇J  = Vector{JNT}(undef, nZ̃)
+    if !isnothing(hess)
+        ∇²J_prep = prepare_hessian(J!, hess, Z̃_J, J_cache...; strict)
+        ∇²J = init_diffmat(JNT, hess, ∇²J_prep, nZ̃, nZ̃)
+    end
+    update_objective! = if !isnothing(hess)
+        function (J, ∇J, ∇²J, Z̃_J, Z̃_arg)
+            if isdifferent(Z̃_arg, Z̃_J)
+                Z̃_J .= Z̃_arg
+                J[], _ = value_gradient_and_hessian!(
+                    J!, ∇J, ∇²J, ∇²J_prep, hess, Z̃_J, J_cache...
+                )
+            end
+        end
+    else
+        update_objective! = function (J, ∇J, Z̃_∇J, Z̃_arg)
+            if isdifferent(Z̃_arg, Z̃_∇J)
+                Z̃_∇J .= Z̃_arg
+                J[], _ = value_and_gradient!(
+                    J!, ∇J, ∇J_prep, grad, Z̃_∇J, J_cache...
+                )
+            end
+        end
+    end
+    J_func = if !isnothing(hess)
+        function (Z̃_arg::Vararg{T, N}) where {N, T<:Real}
+            update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg)
+            return J[]::T
+        end
+    else
+        function (Z̃_arg::Vararg{T, N}) where {N, T<:Real}
+            update_objective!(J, ∇J, Z̃_J, Z̃_arg)
+            return J[]::T
+        end
+    end
+    ∇J_func! = if nZ̃ == 1        # univariate syntax (see JuMP.@operator doc):
+        if !isnothing(hess)
+            function (Z̃_arg)
+                update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg)
+                return ∇J[]
+            end
+        else
+            function (Z̃_arg)
+                update_objective!(J, ∇J, Z̃_J, Z̃_arg)
+                return ∇J[]
+            end
+        end
+    else                        # multivariate syntax (see JuMP.@operator doc):
+        if !isnothing(hess)
+            function (∇J_arg::AbstractVector{T}, Z̃_arg::Vararg{T, N}) where {N, T<:Real}
+                update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg)
+                return ∇J_arg .= ∇J
+            end
+        else
+            function (∇J_arg::AbstractVector{T}, Z̃_arg::Vararg{T, N}) where {N, T<:Real}
+                update_objective!(J, ∇J, Z̃_J, Z̃_arg)
+                return ∇J_arg .= ∇J
+            end
+        end
+    end
+    ∇²J_func! = if nZ̃ == 1        # univariate syntax (see JuMP.@operator doc):
+        function (Z̃_arg)
+            update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg)
+            return ∇²J[]
+        end
+    else                        # multivariate syntax (see JuMP.@operator doc):
+        function (∇²J_arg::AbstractMatrix{T}, Z̃_arg::Vararg{T, N}) where {N, T<:Real}
+            update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg)
+            return fill_lowertriangle!(∇²J_arg, ∇²J)
+        end
+    end
+    if !isnothing(hess)
+        @operator(optim, J_op, nZ̃, J_func, ∇J_func!, ∇²J_func!)
+    else
+        @operator(optim, J_op, nZ̃, J_func, ∇J_func!)
+    end
+    return J_op
+end
+
 """
     get_nonlincon_oracle(mpc::NonLinMPC, optim) -> g_oracle, geq_oracle
 
@@ -661,7 +761,7 @@ function get_nonlincon_oracle(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JN
     Û0::Vector{JNT}, X̂0::Vector{JNT}  = zeros(JNT, nU),  zeros(JNT, nX̂)
     gc::Vector{JNT}, g::Vector{JNT}   = zeros(JNT, nc),  zeros(JNT, ng)
     gi::Vector{JNT}, geq::Vector{JNT} = zeros(JNT, ngi), zeros(JNT, neq)
-    λi::Vector{JNT}, λeq::Vector{JNT} = ones(JNT, ngi), ones(JNT, neq)
+    λi::Vector{JNT}, λeq::Vector{JNT} = ones(JNT, ngi),  ones(JNT, neq)
     # -------------- inequality constraint: nonlinear oracle -----------------------------
     function gi!(gi, Z̃, ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, geq, g)
         update_predictions!(ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g, geq, mpc, Z̃)
@@ -674,22 +774,22 @@ function get_nonlincon_oracle(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JN
         return dot(λi, gi)
     end
     Z̃_∇gi  = fill(myNaN, nZ̃)      # NaN to force update at first call
-    ∇gi_context = (
+    ∇gi_cache = (
         Cache(ΔŨ), Cache(x̂0end), Cache(Ue), Cache(Ŷe), Cache(U0), Cache(Ŷ0), 
         Cache(Û0), Cache(K0), Cache(X̂0), 
         Cache(gc), Cache(geq), Cache(g)
     )
-    ∇gi_prep  = prepare_jacobian(gi!, gi, jac, Z̃_∇gi, ∇gi_context...; strict)
+    ∇gi_prep  = prepare_jacobian(gi!, gi, jac, Z̃_∇gi, ∇gi_cache...; strict)
     ∇gi = init_diffmat(JNT, jac, ∇gi_prep, nZ̃, ngi)
     ∇gi_structure  = init_diffstructure(∇gi)
     if !isnothing(hess)
-        ∇²gi_context = (
+        ∇²gi_cache = (
             Cache(ΔŨ), Cache(x̂0end), Cache(Ue), Cache(Ŷe), Cache(U0), Cache(Ŷ0), 
             Cache(Û0), Cache(K0), Cache(X̂0), 
             Cache(gc), Cache(geq), Cache(g), Cache(gi)
         )
         ∇²gi_prep = prepare_hessian(
-            ℓ_gi, hess, Z̃_∇gi, Constant(λi), ∇²gi_context...; strict
+            ℓ_gi, hess, Z̃_∇gi, Constant(λi), ∇²gi_cache...; strict
         )
         ∇²ℓ_gi    = init_diffmat(JNT, hess, ∇²gi_prep, nZ̃, nZ̃)
         ∇²gi_structure = lowertriangle_indices(init_diffstructure(∇²ℓ_gi))
@@ -697,7 +797,7 @@ function get_nonlincon_oracle(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JN
     function update_con!(gi, ∇gi, Z̃_∇gi, Z̃_arg)
         if isdifferent(Z̃_arg, Z̃_∇gi)
             Z̃_∇gi .= Z̃_arg
-            value_and_jacobian!(gi!, gi, ∇gi, ∇gi_prep, jac, Z̃_∇gi, ∇gi_context...)
+            value_and_jacobian!(gi!, gi, ∇gi, ∇gi_prep, jac, Z̃_∇gi, ∇gi_cache...)
         end
         return nothing
     end
@@ -712,7 +812,7 @@ function get_nonlincon_oracle(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JN
     function ∇²gi_func!(∇²ℓ_arg, Z̃_arg, λ_arg)
         Z̃_∇gi  .= Z̃_arg
         λi     .= λ_arg
-        hessian!(ℓ_gi, ∇²ℓ_gi, ∇²gi_prep, hess, Z̃_∇gi, Constant(λi), ∇²gi_context...)
+        hessian!(ℓ_gi, ∇²ℓ_gi, ∇²gi_prep, hess, Z̃_∇gi, Constant(λi), ∇²gi_cache...)
         return diffmat2vec!(∇²ℓ_arg, ∇²ℓ_gi, ∇²gi_structure)
     end
     gi_min = fill(-myInf, ngi)
@@ -725,7 +825,7 @@ function get_nonlincon_oracle(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JN
         jacobian_structure = ∇gi_structure,
         eval_jacobian = ∇gi_func!,
         hessian_lagrangian_structure = isnothing(hess) ? Tuple{Int,Int}[] : ∇²gi_structure,
-        eval_hessian_lagrangian      = isnothing(hess) ? nothing           : ∇²gi_func!
+        eval_hessian_lagrangian      = isnothing(hess) ? nothing          : ∇²gi_func!
     )
     # ------------- equality constraints : nonlinear oracle ------------------------------
     function geq!(geq, Z̃, ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g) 
@@ -737,22 +837,22 @@ function get_nonlincon_oracle(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JN
         return dot(λeq, geq)
     end
     Z̃_∇geq = fill(myNaN, nZ̃)    # NaN to force update at first call
-    ∇geq_context = (
+    ∇geq_cache = (
         Cache(ΔŨ), Cache(x̂0end), Cache(Ue), Cache(Ŷe), Cache(U0), Cache(Ŷ0),
         Cache(Û0), Cache(K0),   Cache(X̂0),
         Cache(gc), Cache(g)
     )
-    ∇geq_prep = prepare_jacobian(geq!, geq, jac, Z̃_∇geq, ∇geq_context...; strict)
+    ∇geq_prep = prepare_jacobian(geq!, geq, jac, Z̃_∇geq, ∇geq_cache...; strict)
     ∇geq    = init_diffmat(JNT, jac, ∇geq_prep, nZ̃, neq)
     ∇geq_structure  = init_diffstructure(∇geq)
     if !isnothing(hess)
-        ∇²geq_context = (
+        ∇²geq_cache = (
             Cache(ΔŨ), Cache(x̂0end), Cache(Ue), Cache(Ŷe), Cache(U0), Cache(Ŷ0),
             Cache(Û0), Cache(K0),   Cache(X̂0),
             Cache(gc), Cache(geq), Cache(g)
         )
         ∇²geq_prep = prepare_hessian(
-            ℓ_geq, hess, Z̃_∇geq, Constant(λeq), ∇²geq_context...; strict
+            ℓ_geq, hess, Z̃_∇geq, Constant(λeq), ∇²geq_cache...; strict
         )
         ∇²ℓ_geq = init_diffmat(JNT, hess, ∇²geq_prep, nZ̃, nZ̃)
         ∇²geq_structure = lowertriangle_indices(init_diffstructure(∇²ℓ_geq))
@@ -760,7 +860,7 @@ function get_nonlincon_oracle(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JN
     function update_con_eq!(geq, ∇geq, Z̃_∇geq, Z̃_arg)
         if isdifferent(Z̃_arg, Z̃_∇geq)
             Z̃_∇geq .= Z̃_arg
-            value_and_jacobian!(geq!, geq, ∇geq, ∇geq_prep, jac, Z̃_∇geq, ∇geq_context...)
+            value_and_jacobian!(geq!, geq, ∇geq, ∇geq_prep, jac, Z̃_∇geq, ∇geq_cache...)
         end
         return nothing
     end
@@ -775,7 +875,7 @@ function get_nonlincon_oracle(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JN
     function ∇²geq_func!(∇²ℓ_arg, Z̃_arg, λ_arg)
         Z̃_∇geq .= Z̃_arg
         λeq    .= λ_arg
-        hessian!(ℓ_geq, ∇²ℓ_geq, ∇²geq_prep, hess, Z̃_∇geq, Constant(λeq), ∇²geq_context...)
+        hessian!(ℓ_geq, ∇²ℓ_geq, ∇²geq_prep, hess, Z̃_∇geq, Constant(λeq), ∇²geq_cache...)
         return diffmat2vec!(∇²ℓ_arg, ∇²ℓ_geq, ∇²geq_structure)
     end
     geq_min = geq_max = zeros(JNT, neq)
@@ -792,130 +892,6 @@ function get_nonlincon_oracle(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JN
     return g_oracle, geq_oracle
 end
 
-"""
-    get_nonlinobj_op(mpc::NonLinMPC, optim::JuMP.GenericModel{JNT}) -> J_op
-
-Return the nonlinear operator for the objective function of `mpc` [`NonLinMPC`](@ref).
-
-It is based on the splatting syntax. This method is really intricate and that's because of:
-
-- These functions are used inside the nonlinear optimization, so they must be type-stable
-  and as efficient as possible. All the function outputs and derivatives are cached and
-  updated in-place if required to use the efficient [`value_and_gradient!`](@extref DifferentiationInterface DifferentiationInterface.value_and_jacobian!).
-- The splatting syntax for objective functions implies the use of `Vararg{T,N}` (see the [performance tip](@extref Julia Be-aware-of-when-Julia-avoids-specializing))
-  and memoization to avoid redundant computations. This is already complex, but it's even
-  worse knowing that the automatic differentiation tools do not support splatting.
-- The signature of gradient and hessian functions is not the same for univariate (`nZ̃ == 1`)
-  and multivariate (`nZ̃ > 1`) operators in `JuMP`. Both must be defined.
-"""
-function get_nonlinobj_op(mpc::NonLinMPC, optim::JuMP.GenericModel{JNT}) where JNT<:Real
-    model = mpc.estim.model
-    transcription = mpc.transcription
-    grad, hess = mpc.gradient, mpc.hessian
-    nu, ny, nx̂, nϵ = model.nu, model.ny, mpc.estim.nx̂, mpc.nϵ
-    nk = get_nk(model, transcription)
-    Hp, Hc = mpc.Hp, mpc.Hc
-    ng = length(mpc.con.i_g)
-    nc, neq = mpc.con.nc, mpc.con.neq
-    nZ̃, nU, nŶ, nX̂, nK = length(mpc.Z̃), Hp*nu, Hp*ny, Hp*nx̂, Hp*nk
-    nΔŨ, nUe, nŶe = nu*Hc + nϵ, nU + nu, nŶ + ny  
-    strict = Val(true)
-    myNaN                            = convert(JNT, NaN)
-    J::Vector{JNT}                   = zeros(JNT, 1)
-    ΔŨ::Vector{JNT}                  = zeros(JNT, nΔŨ)
-    x̂0end::Vector{JNT}               = zeros(JNT, nx̂)
-    K0::Vector{JNT}                  = zeros(JNT, nK)
-    Ue::Vector{JNT}, Ŷe::Vector{JNT} = zeros(JNT, nUe), zeros(JNT, nŶe)
-    U0::Vector{JNT}, Ŷ0::Vector{JNT} = zeros(JNT, nU),  zeros(JNT, nŶ)
-    Û0::Vector{JNT}, X̂0::Vector{JNT} = zeros(JNT, nU),  zeros(JNT, nX̂)
-    gc::Vector{JNT}, g::Vector{JNT}  = zeros(JNT, nc),  zeros(JNT, ng)
-    geq::Vector{JNT}                 = zeros(JNT, neq)
-    function J!(Z̃, ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g, geq)
-        update_predictions!(ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g, geq, mpc, Z̃)
-        return obj_nonlinprog!(Ŷ0, U0, mpc, model, Ue, Ŷe, ΔŨ)
-    end
-    Z̃_J = fill(myNaN, nZ̃)      # NaN to force update at first call
-    J_context = (
-        Cache(ΔŨ), Cache(x̂0end), Cache(Ue), Cache(Ŷe), Cache(U0), Cache(Ŷ0), 
-        Cache(Û0), Cache(K0), Cache(X̂0), 
-        Cache(gc), Cache(g), Cache(geq),
-    )
-    ∇J_prep = prepare_gradient(J!, grad, Z̃_J, J_context...; strict)
-    ∇J  = Vector{JNT}(undef, nZ̃)
-    if !isnothing(hess)
-        ∇²J_prep = prepare_hessian(J!, hess, Z̃_J, J_context...; strict)
-        ∇²J = init_diffmat(JNT, hess, ∇²J_prep, nZ̃, nZ̃)
-    end
-    update_objective! = if !isnothing(hess)
-        function (J, ∇J, ∇²J, Z̃_J, Z̃_arg)
-            if isdifferent(Z̃_arg, Z̃_J)
-                Z̃_J .= Z̃_arg
-                J[], _ = value_gradient_and_hessian!(J!, ∇J, ∇²J, hess, Z̃_J, J_context...)
-            end
-        end
-    else
-        update_objective! = function (J, ∇J, Z̃_∇J, Z̃_arg)
-            if isdifferent(Z̃_arg, Z̃_∇J)
-                Z̃_∇J .= Z̃_arg
-                J[], _ = value_and_gradient!(J!, ∇J, ∇J_prep, grad, Z̃_∇J, J_context...)
-            end
-        end
-    end
-    J_func = if !isnothing(hess)
-        function (Z̃_arg::Vararg{T, N}) where {N, T<:Real}
-            update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg)
-            return J[]::T
-        end
-    else
-        function (Z̃_arg::Vararg{T, N}) where {N, T<:Real}
-            update_objective!(J, ∇J, Z̃_J, Z̃_arg)
-            return J[]::T
-        end
-    end
-    ∇J_func! = if nZ̃ == 1        # univariate syntax (see JuMP.@operator doc):
-        if !isnothing(hess)
-            function (Z̃_arg)
-                update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg)
-                return ∇J[]
-            end
-        else
-            function (Z̃_arg)
-                update_objective!(J, ∇J, Z̃_J, Z̃_arg)
-                return ∇J[]
-            end
-        end
-    else                        # multivariate syntax (see JuMP.@operator doc):
-        if !isnothing(hess)
-            function (∇J_arg::AbstractVector{T}, Z̃_arg::Vararg{T, N}) where {N, T<:Real}
-                update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg)
-                return ∇J_arg .= ∇J
-            end
-        else
-            function (∇J_arg::AbstractVector{T}, Z̃_arg::Vararg{T, N}) where {N, T<:Real}
-                update_objective!(J, ∇J, Z̃_J, Z̃_arg)
-                return ∇J_arg .= ∇J
-            end
-        end
-    end
-    ∇²J_func! = if nZ̃ == 1        # univariate syntax (see JuMP.@operator doc):
-        function (Z̃_arg)
-            update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg)
-            return ∇²J[]
-        end
-    else                        # multivariate syntax (see JuMP.@operator doc):
-        function (∇²J_arg::AbstractMatrix{T}, Z̃_arg::Vararg{T, N}) where {N, T<:Real}
-            update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg)
-            return fill_lowertriangle!(∇²J_arg, ∇²J)
-        end
-    end
-    if !isnothing(hess)
-        @operator(optim, J_op, nZ̃, J_func, ∇J_func!, ∇²J_func!)
-    else
-        @operator(optim, J_op, nZ̃, J_func, ∇J_func!)
-    end
-    return J_op
-end
-
 """
     update_predictions!(
         ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g, geq, 
diff --git a/src/estimator/mhe.jl b/src/estimator/mhe.jl
index 008a490e4..f466569b4 100644
--- a/src/estimator/mhe.jl
+++ b/src/estimator/mhe.jl
@@ -13,6 +13,7 @@ end
 function print_backends(io::IO, estim::MovingHorizonEstimator, ::SimModel)
     println(io, "├ gradient: $(backend_str(estim.gradient))")
     println(io, "├ jacobian: $(backend_str(estim.jacobian))")
+    println(io, "├ hessian: $(backend_str(estim.hessian))")
 end
 "No differentiation backends to print for `LinModel`."
 print_backends(::IO, ::MovingHorizonEstimator, ::LinModel) = nothing
diff --git a/src/estimator/mhe/construct.jl b/src/estimator/mhe/construct.jl
index 40474f426..ec92fc199 100644
--- a/src/estimator/mhe/construct.jl
+++ b/src/estimator/mhe/construct.jl
@@ -2,6 +2,11 @@ const DEFAULT_LINMHE_OPTIMIZER    = OSQP.MathOptInterfaceOSQP.Optimizer
 const DEFAULT_NONLINMHE_OPTIMIZER = optimizer_with_attributes(Ipopt.Optimizer,"sb"=>"yes")
 const DEFAULT_NONLINMHE_GRADIENT  = AutoForwardDiff()
 const DEFAULT_NONLINMHE_JACOBIAN  = AutoForwardDiff()
+const DEFAULT_NONLINMHE_HESSIAN   = AutoSparse(
+    AutoForwardDiff();
+    sparsity_detector=TracerSparsityDetector(),
+    coloring_algorithm=GreedyColoringAlgorithm(),
+) 
 
 @doc raw"""
 Include all the data for the constraints of [`MovingHorizonEstimator`](@ref).
@@ -51,6 +56,7 @@ struct MovingHorizonEstimator{
     JM<:JuMP.GenericModel,
     GB<:AbstractADType,
     JB<:AbstractADType,
+    HB<:Union{AbstractADType, Nothing}, 
     CE<:KalmanEstimator,
 } <: StateEstimator{NT}
     model::SM
@@ -61,6 +67,7 @@ struct MovingHorizonEstimator{
     con::EstimatorConstraint{NT}
     gradient::GB
     jacobian::JB
+    hessian::HB
     oracle::Bool
     covestim::CE
     Z̃::Vector{NT}
@@ -113,7 +120,7 @@ struct MovingHorizonEstimator{
     function MovingHorizonEstimator{NT}(
         model::SM, 
         He, i_ym, nint_u, nint_ym, cov::KC, Cwt, 
-        optim::JM, gradient::GB, jacobian::JB, oracle, covestim::CE;
+        optim::JM, gradient::GB, jacobian::JB, hessian::HB, oracle, covestim::CE;
         direct=true
     ) where {
             NT<:Real, 
@@ -122,6 +129,7 @@ struct MovingHorizonEstimator{
             JM<:JuMP.GenericModel, 
             GB<:AbstractADType,
             JB<:AbstractADType,
+            HB<:Union{AbstractADType, Nothing},
             CE<:KalmanEstimator{NT}
         }
         nu, ny, nd, nk = model.nu, model.ny, model.nd, model.nk
@@ -158,11 +166,11 @@ struct MovingHorizonEstimator{
         P̂arr_old = copy(cov.P̂_0)
         Nk = [0]
         corrected = [false]
-        estim = new{NT, SM, KC, JM, GB, JB, CE}(
+        estim = new{NT, SM, KC, JM, GB, JB, HB, CE}(
             model,
             cov,
             optim, con, 
-            gradient, jacobian, oracle,
+            gradient, jacobian, hessian, oracle,
             covestim,  
             Z̃, lastu0, x̂op, f̂op, x̂0, 
             He, nε,
@@ -268,8 +276,11 @@ transcription for now.
    function when `model` is not a [`LinModel`](@ref), see [`DifferentiationInterface` doc](@extref DifferentiationInterface List).
 - `jacobian=AutoForwardDiff()` : an `AbstractADType` backend for the Jacobian of the
    constraints when `model` is not a [`LinModel`](@ref), see `gradient` above for the options.
-- `oracle=JuMP.solver_name(optim)=="Ipopt"`: use the efficient [`VectorNonlinearOracle`](@extref MathOptInterface MathOptInterface.VectorNonlinearOracle)
-   for the nonlinear constraints (not supported by most optimizers for now).
+- `hessian=false` : an `AbstractADType` backend for the Hessian of the Lagrangian, see 
+   `gradient` above for the options. The default `false` skip it and use the quasi-Newton
+    method of `optim`, which is always the case if `oracle=false` (see Extended Help).
+- `oracle=JuMP.solver_name(optim)=="Ipopt"` : a `Bool` to use the [`VectorNonlinearOracle`](@extref MathOptInterface MathOptInterface.VectorNonlinearOracle)
+   for efficient nonlinear constraints (not supported by most optimizers for now).
 - `direct=true`: construct with a direct transmission from ``\mathbf{y^m}`` (a.k.a. current
    estimator, in opposition to the delayed/predictor form).
 
@@ -280,10 +291,11 @@ julia> model = NonLinModel((x,u,_,_)->0.1x+u, (x,_,_)->2x, 10.0, 1, 1, 1, solver
 julia> estim = MovingHorizonEstimator(model, He=5, σR=[1], σP_0=[0.01])
 MovingHorizonEstimator estimator with a sample time Ts = 10.0 s:
 ├ model: NonLinModel
-├ optimizer: Ipopt
+├ optimizer: Ipopt 
 ├ gradient: AutoForwardDiff
 ├ jacobian: AutoForwardDiff
-├ arrival covariance: UnscentedKalmanFilter
+├ hessian: nothing
+├ arrival covariance: UnscentedKalmanFilter 
 └ dimensions:
   ├ 5 estimation steps He
   ├ 0 slack variable ε (estimation constraints)
@@ -363,7 +375,16 @@ MovingHorizonEstimator estimator with a sample time Ts = 10.0 s:
       [`JuMP` documentation](@extref JuMP Common-mistakes-when-writing-a-user-defined-operator)
       for common mistakes when writing these functions. Also, an [`UnscentedKalmanFilter`](@ref)
       estimates the arrival covariance by default.
-    
+
+    One exception about AD: the selected backend for the Hessian of the Lagrangian function
+    with `oracle=true` and `hessian=true` options is sparse:
+    ```julia
+    AutoSparse(
+        AutoForwardDiff(); 
+        sparsity_detector  = TracerSparsityDetector(), 
+        coloring_algorithm = GreedyColoringAlgorithm()
+    )
+    ```
     The slack variable ``ε`` relaxes the constraints if enabled, see [`setconstraint!`](@ref). 
     It is disabled by default for the MHE (from `Cwt=Inf`) but it should be activated for
     problems with two or more types of bounds, to ensure feasibility (e.g. on the estimated
@@ -389,6 +410,7 @@ function MovingHorizonEstimator(
     optim::JM = default_optim_mhe(model),
     gradient::AbstractADType = DEFAULT_NONLINMHE_GRADIENT,
     jacobian::AbstractADType = DEFAULT_NONLINMHE_JACOBIAN,
+    hessian::Union{AbstractADType, Bool, Nothing} = false,
     oracle::Bool = JuMP.solver_name(optim)=="Ipopt",
     direct = true,
     σP_0       = sigmaP_0,
@@ -406,7 +428,7 @@ function MovingHorizonEstimator(
     isnothing(He) && throw(ArgumentError("Estimation horizon He must be explicitly specified")) 
     return MovingHorizonEstimator(
         model, He, i_ym, nint_u, nint_ym, P̂_0, Q̂, R̂, Cwt; 
-        direct, optim, gradient, jacobian, oracle
+        direct, optim, gradient, jacobian, hessian, oracle
     )
 end
 
@@ -440,17 +462,19 @@ function MovingHorizonEstimator(
     optim::JM = default_optim_mhe(model),
     gradient::AbstractADType = DEFAULT_NONLINMHE_GRADIENT,
     jacobian::AbstractADType = DEFAULT_NONLINMHE_JACOBIAN,
+    hessian::Union{AbstractADType, Bool, Nothing} = false,
     oracle::Bool = JuMP.solver_name(optim)=="Ipopt",
     direct = true,
     covestim::CE = default_covestim_mhe(model, i_ym, nint_u, nint_ym, P̂_0, Q̂, R̂; direct)
 ) where {NT<:Real, SM<:SimModel{NT}, JM<:JuMP.GenericModel, CE<:StateEstimator{NT}}
     P̂_0, Q̂, R̂ = to_mat(P̂_0), to_mat(Q̂), to_mat(R̂)
     cov = KalmanCovariances(model, i_ym, nint_u, nint_ym, Q̂, R̂, P̂_0, He)
+    hessian = validate_hessian(hessian, gradient, oracle, DEFAULT_NONLINMHE_HESSIAN)
     validate_covestim(cov, covestim)
     return MovingHorizonEstimator{NT}(
         model, 
         He, i_ym, nint_u, nint_ym, cov, Cwt, 
-        optim, gradient, jacobian, oracle, covestim; 
+        optim, gradient, jacobian, hessian, oracle, covestim; 
         direct
     )
 end
@@ -735,7 +759,7 @@ Re-construct nonlinear constraints and add them to `estim.optim`.
 function reset_nonlincon!(estim::MovingHorizonEstimator, model::NonLinModel)
     optim = estim.optim
     if estim.oracle
-        g_oracle, = get_nonlinops(estim, optim)
+        g_oracle = get_nonlincon_oracle(estim, optim)
         set_nonlincon!(estim, model, optim, g_oracle)
     else
         set_nonlincon_leg!(estim, model, optim)
@@ -1304,8 +1328,8 @@ function init_optimization!(
     end
     # constraints with vector nonlinear oracle, objective function with splatting:
     if estim.oracle
-        g_oracle, J_op = get_nonlinops(estim, optim)
-        optim[:J_op] = J_op
+        J_op = get_nonlinobj_op(estim, optim)
+        g_oracle = get_nonlincon_oracle(estim, optim)
     else
         J_func, ∇J_func!, g_funcs, ∇g_funcs! = get_optim_functions(estim, optim)
         @operator(optim, J_op, nZ̃, J_func, ∇J_func!)
@@ -1321,72 +1345,193 @@ function init_optimization!(
 end
 
 """
-    get_nonlinops(estim::MovingHorizonEstimator, optim) -> g_oracle, J_op
+    get_nonlinobj_op(estim::MovingHorizonEstimator, optim) -> J_op
 
-Return the operators for the nonlinear optimization of [`MovingHorizonEstimator`](@ref).
+Return the nonlinear operator for the objective of `estim` [`MovingHorizonEstimator`](@ref).
 
-Return `g_oracle`, the [`VectorNonlinearOracle`](@extref MathOptInterface MathOptInterface.VectorNonlinearOracle)
-for the ineqaulity constraints. Note that `g_oracle` only includes the non-`Inf`
-inequality constraints, thus it must be re-constructed if they change. Also return `J_op`, 
-the [`NonlinearOperator`](@extref JuMP NonlinearOperator) for the objective function, based
-on the splatting syntax. This method is really intricate and that's because of 2 elements:
+It is based on the splatting syntax. This method is really intricate and that's because of:
 
 - These functions are used inside the nonlinear optimization, so they must be type-stable
   and as efficient as possible. All the function outputs and derivatives are cached and
-  updated in-place if required to use the efficient [`value_and_jacobian!`](@extref DifferentiationInterface DifferentiationInterface.value_and_jacobian!).
+  updated in-place if required to use the efficient [`value_and_gradient!`](@extref DifferentiationInterface DifferentiationInterface.value_and_jacobian!).
 - The splatting syntax for objective functions implies the use of `Vararg{T,N}` (see the [performance tip](@extref Julia Be-aware-of-when-Julia-avoids-specializing))
   and memoization to avoid redundant computations. This is already complex, but it's even
   worse knowing that the automatic differentiation tools do not support splatting.
 """
-function get_nonlinops(
+function get_nonlinobj_op(
     estim::MovingHorizonEstimator, optim::JuMP.GenericModel{JNT}
 ) where JNT<:Real
-    # ----------- common cache for Jfunc and gfuncs  --------------------------------------
     model, con = estim.model, estim.con
-    grad, jac = estim.gradient, estim.jacobian
-    nx̂, nym, nŷ, nu, nε, nk = estim.nx̂, estim.nym, model.ny, model.nu, estim.nε, model.nk
+    grad, hess = estim.gradient, estim.hessian
+    nx̂, nym, nŷ, nu, nk = estim.nx̂, estim.nym, model.ny, model.nu, model.nk
+    He = estim.He
+    ng = length(con.i_g)
+    nV̂, nX̂, ng, nZ̃ = He*nym, He*nx̂, length(con.i_g), length(estim.Z̃)
+    strict = Val(true)
+    myNaN                            = convert(JNT, NaN)
+    J::Vector{JNT}                   = zeros(JNT, 1)
+    V̂::Vector{JNT},  X̂0::Vector{JNT} = zeros(JNT, nV̂), zeros(JNT, nX̂)
+    k0::Vector{JNT}                  = zeros(JNT, nk)
+    û0::Vector{JNT}, ŷ0::Vector{JNT} = zeros(JNT, nu), zeros(JNT, nŷ)
+    g::Vector{JNT}                   = zeros(JNT, ng) 
+    x̄::Vector{JNT}                   = zeros(JNT, nx̂)
+    function J!(Z̃, V̂, X̂0, û0, k0, ŷ0, g, x̄)
+        update_prediction!(V̂, X̂0, û0, k0, ŷ0, g, estim, Z̃)
+        return obj_nonlinprog!(x̄, estim, model, V̂, Z̃)
+    end
+    Z̃_J = fill(myNaN, nZ̃)      # NaN to force update_predictions! at first call
+    J_cache = (
+        Cache(V̂),  Cache(X̂0), Cache(û0), Cache(k0), Cache(ŷ0),
+        Cache(g),
+        Cache(x̄),
+    )
+    # temporarily "fill" the estimation window for the preparation of the gradient: 
+    estim.Nk[] = He
+    ∇J_prep = prepare_gradient(J!, grad, Z̃_J, J_cache...; strict)
+    estim.Nk[] = 0
+    ∇J = Vector{JNT}(undef, nZ̃)
+    if !isnothing(hess)
+        estim.Nk[] = He # see comment above
+        ∇²J_prep = prepare_hessian(J!, hess, Z̃_J, J_cache...; strict)
+        estim.Nk[] = 0
+        ∇²J = init_diffmat(JNT, hess, ∇²J_prep, nZ̃, nZ̃)
+    end
+    update_objective! = if !isnothing(hess)
+        function (J, ∇J, ∇²J, Z̃_∇J, Z̃_arg)
+            if isdifferent(Z̃_arg, Z̃_∇J)
+                Z̃_∇J .= Z̃_arg
+                J[], _ = value_gradient_and_hessian!(
+                    J!, ∇J, ∇²J, ∇²J_prep, hess, Z̃_J, J_cache...
+                )
+            end
+        end    
+    else
+        function (J, ∇J, Z̃_∇J, Z̃_arg)
+            if isdifferent(Z̃_arg, Z̃_∇J)
+                Z̃_∇J .= Z̃_arg
+                J[], _ = value_and_gradient!(J!, ∇J, ∇J_prep, grad, Z̃_J, J_cache...)
+            end
+        end
+    end
+    J_func = if !isnothing(hess)
+        function (Z̃_arg::Vararg{T, N}) where {N, T<:Real}
+            update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg)
+            return J[]::T
+        end
+    else
+        function (Z̃_arg::Vararg{T, N}) where {N, T<:Real}
+            update_objective!(J, ∇J, Z̃_J, Z̃_arg)
+            return J[]::T
+        end
+    end
+    # only the multivariate syntax of JuMP.@operator, univariate is impossible for MHE
+    # since Z̃ comprises the arrival state estimate AND the estimated process noise:
+    ∇J_func! = if !isnothing(hess)
+        function (∇J_arg::AbstractVector{T}, Z̃_arg::Vararg{T, N}) where {N, T<:Real}
+            update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg)
+            return ∇J_arg .= ∇J
+        end
+    else
+        function (∇J_arg::AbstractVector{T}, Z̃_arg::Vararg{T, N}) where {N, T<:Real}
+            update_objective!(J, ∇J, Z̃_J, Z̃_arg)
+            return ∇J_arg .= ∇J
+        end
+    end
+    function ∇²J_func!(∇²J_arg::AbstractMatrix{T}, Z̃_arg::Vararg{T, N}) where {N, T<:Real}
+        update_objective!(J, ∇J, ∇²J, Z̃_J, Z̃_arg)
+        return fill_lowertriangle!(∇²J_arg, ∇²J)
+    end
+    if !isnothing(hess)
+        @operator(optim, J_op, nZ̃, J_func, ∇J_func!, ∇²J_func!)
+    else
+        @operator(optim, J_op, nZ̃, J_func, ∇J_func!)
+    end
+    return J_op
+end
+
+"""
+    get_nonlincon_oracle(estim::MovingHorizonEstimator, optim) -> g_oracle, geq_oracle
+
+Return the nonlinear constraint oracles for [`MovingHorizonEstimator`](@ref) `estim`.
+
+Return `g_oracle` and `geq_oracle`, the inequality and equality [`VectorNonlinearOracle`](@extref MathOptInterface MathOptInterface.VectorNonlinearOracle)
+for the two respective constraints. Note that `g_oracle` only includes the non-`Inf`
+inequality constraints, thus it must be re-constructed if they change. This method is really
+intricate because the oracles are used inside the nonlinear optimization, so they must be
+type-stable and as efficient as possible. All the function outputs and derivatives are 
+ached and updated in-place if required to use the efficient [`value_and_jacobian!`](@extref DifferentiationInterface DifferentiationInterface.value_and_jacobian!).
+"""
+function get_nonlincon_oracle(
+    estim::MovingHorizonEstimator, ::JuMP.GenericModel{JNT}
+) where JNT<:Real
+    # ----------- common cache for all functions  ----------------------------------------
+    model, con = estim.model, estim.con
+    jac, hess = estim.jacobian, estim.hessian
+    nx̂, nym, nŷ, nu, nk = estim.nx̂, estim.nym, model.ny, model.nu, model.nk
     He = estim.He
     i_g = findall(con.i_g) # convert to non-logical indices for non-allocating @views
     ng, ngi = length(con.i_g), sum(con.i_g)
     nV̂, nX̂, ng, nZ̃ = He*nym, He*nx̂, length(con.i_g), length(estim.Z̃)
     strict = Val(true)
     myNaN, myInf                     = convert(JNT, NaN), convert(JNT, Inf)
-    J::Vector{JNT}                   = zeros(JNT, 1)
     V̂::Vector{JNT},  X̂0::Vector{JNT} = zeros(JNT, nV̂), zeros(JNT, nX̂)
     k0::Vector{JNT}                  = zeros(JNT, nk)
     û0::Vector{JNT}, ŷ0::Vector{JNT} = zeros(JNT, nu), zeros(JNT, nŷ)
     g::Vector{JNT}, gi::Vector{JNT}  = zeros(JNT, ng), zeros(JNT, ngi)
-    x̄::Vector{JNT}                   = zeros(JNT, nx̂)
-    # -------------- inequality constraint: nonlinear oracle -----------------------------
+    λi::Vector{JNT}                  = ones(JNT, ngi)
+    # -------------- inequality constraint: nonlinear oracle -------------------------
     function gi!(gi, Z̃, V̂, X̂0, û0, k0, ŷ0, g)
         update_prediction!(V̂, X̂0, û0, k0, ŷ0, g, estim, Z̃)
         gi .= @views g[i_g]
         return nothing
     end
+    function ℓ_gi(Z̃, λi, V̂, X̂0, û0, k0, ŷ0, g, gi)
+        update_prediction!(V̂, X̂0, û0, k0, ŷ0, g, estim, Z̃)
+        gi .= @views g[i_g]
+        return dot(λi, gi)
+    end
     Z̃_∇gi = fill(myNaN, nZ̃)      # NaN to force update_predictions! at first call
-    ∇gi_context = (
+    ∇gi_cache = (
         Cache(V̂), Cache(X̂0), Cache(û0), Cache(k0), Cache(ŷ0), Cache(g)
     )
     # temporarily "fill" the estimation window for the preparation of the gradient: 
     estim.Nk[] = He
-    ∇gi_prep = prepare_jacobian(gi!, gi, jac, Z̃_∇gi, ∇gi_context...; strict)
+    ∇gi_prep = prepare_jacobian(gi!, gi, jac, Z̃_∇gi, ∇gi_cache...; strict)
     estim.Nk[] = 0
     ∇gi = init_diffmat(JNT, jac, ∇gi_prep, nZ̃, ngi)
     ∇gi_structure = init_diffstructure(∇gi)
+    if !isnothing(hess)
+        ∇²gi_cache = (
+            Cache(V̂), Cache(X̂0), Cache(û0), Cache(k0), Cache(ŷ0), Cache(g), Cache(gi)
+        )
+        estim.Nk[] = He # see comment above
+        ∇²gi_prep = prepare_hessian(
+            ℓ_gi, hess, Z̃_∇gi, Constant(λi), ∇²gi_cache...; strict
+        )
+        estim.Nk[] = 0
+        ∇²ℓ_gi    = init_diffmat(JNT, hess, ∇²gi_prep, nZ̃, nZ̃)
+        ∇²gi_structure = lowertriangle_indices(init_diffstructure(∇²ℓ_gi))
+    end
     function update_con!(gi, ∇gi, Z̃_∇gi, Z̃_arg)
         if isdifferent(Z̃_arg, Z̃_∇gi)
             Z̃_∇gi .= Z̃_arg
-            value_and_jacobian!(gi!, gi, ∇gi, ∇gi_prep, jac, Z̃_∇gi, ∇gi_context...)
+            value_and_jacobian!(gi!, gi, ∇gi, ∇gi_prep, jac, Z̃_∇gi, ∇gi_cache...)
         end
         return nothing
     end
-    function gi_func!(gi_vec, Z̃_arg)
+    function gi_func!(gi_arg, Z̃_arg)
         update_con!(gi, ∇gi, Z̃_∇gi, Z̃_arg)
-        return gi_vec .= gi
+        return gi_arg .= gi
     end
-    function ∇gi_func!(∇gi_vec, Z̃_arg)
+    function ∇gi_func!(∇gi_arg, Z̃_arg)
         update_con!(gi, ∇gi, Z̃_∇gi, Z̃_arg)
-        return diffmat2vec!(∇gi_vec, ∇gi, ∇gi_structure)
+        return diffmat2vec!(∇gi_arg, ∇gi, ∇gi_structure)
+    end
+    function ∇²gi_func!(∇²ℓ_arg, Z̃_arg, λ_arg)
+        Z̃_∇gi  .= Z̃_arg
+        λi     .= λ_arg
+        hessian!(ℓ_gi, ∇²ℓ_gi, ∇²gi_prep, hess, Z̃_∇gi, Constant(λi), ∇²gi_cache...)
+        return diffmat2vec!(∇²ℓ_arg, ∇²ℓ_gi, ∇²gi_structure)
     end
     gi_min = fill(-myInf, ngi)
     gi_max = zeros(JNT,   ngi)
@@ -1396,42 +1541,11 @@ function get_nonlinops(
         u = gi_max,
         eval_f = gi_func!,
         jacobian_structure = ∇gi_structure,
-        eval_jacobian = ∇gi_func!
+        eval_jacobian = ∇gi_func!,
+        hessian_lagrangian_structure = isnothing(hess) ? Tuple{Int,Int}[] : ∇²gi_structure,
+        eval_hessian_lagrangian      = isnothing(hess) ? nothing          : ∇²gi_func!
     )
-    # ------------- objective function: splatting syntax ---------------------------------
-    function J!(Z̃, V̂, X̂0, û0, k0, ŷ0, g, x̄)
-        update_prediction!(V̂, X̂0, û0, k0, ŷ0, g, estim, Z̃)
-        return obj_nonlinprog!(x̄, estim, model, V̂, Z̃)
-    end
-    Z̃_∇J = fill(myNaN, nZ̃)      # NaN to force update_predictions! at first call
-    ∇J_context = (
-        Cache(V̂),  Cache(X̂0), Cache(û0), Cache(k0), Cache(ŷ0),
-        Cache(g),
-        Cache(x̄),
-    )
-    # temporarily "fill" the estimation window for the preparation of the gradient: 
-    estim.Nk[] = He
-    ∇J_prep = prepare_gradient(J!, grad, Z̃_∇J, ∇J_context...; strict)
-    estim.Nk[] = 0
-    ∇J = Vector{JNT}(undef, nZ̃)
-    function update_objective!(J, ∇J, Z̃_∇J, Z̃_arg)
-        if isdifferent(Z̃_arg, Z̃_∇J)
-            Z̃_∇J .= Z̃_arg
-            J[], _ = value_and_gradient!(J!, ∇J, ∇J_prep, grad, Z̃_∇J, ∇J_context...)
-        end
-    end    
-    function J_func(Z̃_arg::Vararg{T, N}) where {N, T<:Real}
-        update_objective!(J, ∇J, Z̃_∇J, Z̃_arg)
-        return J[]::T
-    end
-    function ∇J_func!(∇J_arg::AbstractVector{T}, Z̃_arg::Vararg{T, N}) where {N, T<:Real}
-        # only the multivariate syntax of JuMP.@operator, univariate is impossible for MHE
-        # since Z̃ comprises the arrival state estimate AND the estimated process noise
-        update_objective!(J, ∇J, Z̃_∇J, Z̃_arg)
-        return ∇J_arg .= ∇J
-    end
-    J_op = JuMP.add_nonlinear_operator(optim, nZ̃, J_func, ∇J_func!, name=:J_op)
-    g_oracle, J_op
+    return g_oracle
 end
 
 "By default, there is no nonlinear constraint, thus do nothing."
diff --git a/src/general.jl b/src/general.jl
index b608bb415..6845de7da 100644
--- a/src/general.jl
+++ b/src/general.jl
@@ -110,6 +110,30 @@ dense_backend(backend::AbstractADType) = backend
 dense_backend(backend::AutoSparse) = backend.dense_ad
 dense_backend(backend::SecondOrder) = backend.inner
 
+"Validate `hessian` keyword argument and return the differentiation `backend`."
+function validate_hessian(hessian, gradient, oracle, default)
+    if hessian == true
+        backend = default
+    elseif hessian == false || isnothing(hessian)
+        backend = nothing
+    else
+        backend = hessian
+    end
+    if oracle == false && !isnothing(backend)
+        error("Second order derivatives are only supported with oracle=true.")
+    end
+    if oracle == true && !isnothing(backend)
+        hess = dense_backend(backend)
+        grad = dense_backend(gradient)
+        if hess != grad
+            @info "The objective function gradient will be computed with the hessian "*
+                "backend ($(backend_str(hess)))\n instead of the one in gradient "*
+                "argument ($(backend_str(grad))) for efficiency."
+        end
+    end
+    return backend
+end
+
 "Verify that x and y elements are different using `!==`."
 isdifferent(x, y) = any(xi !== yi for (xi, yi) in zip(x, y))
 
diff --git a/test/2_test_state_estim.jl b/test/2_test_state_estim.jl
index 6dcb3bc0e..892887889 100644
--- a/test/2_test_state_estim.jl
+++ b/test/2_test_state_estim.jl
@@ -895,10 +895,14 @@ end
     @test isa(mhe13, MovingHorizonEstimator{Float32})
 
     mhe14 = MovingHorizonEstimator(
-        nonlinmodel, He=5, gradient=AutoFiniteDiff(), jacobian=AutoFiniteDiff()
+        nonlinmodel, He=5, 
+        gradient=AutoFiniteDiff(), 
+        jacobian=AutoFiniteDiff(),
+        hessian=AutoFiniteDiff()
     )
     @test mhe14.gradient == AutoFiniteDiff()
     @test mhe14.jacobian == AutoFiniteDiff()
+    @test mhe14.hessian == AutoFiniteDiff()
 
     @test_throws ArgumentError MovingHorizonEstimator(linmodel)
     @test_throws ArgumentError MovingHorizonEstimator(linmodel, He=0)
@@ -952,30 +956,39 @@ end
     preparestate!(mhe1, [51, 32], [5])
     @test mhe1([5]) ≈ [51, 32] atol=1e-3
 
-    mhe1 = MovingHorizonEstimator(nonlinmodel, He=2, nint_u=[1, 1], nint_ym=[0, 0], direct=false)
-    JuMP.set_attribute(mhe1.optim, "tol", 1e-7)
-    preparestate!(mhe1, [50, 30], [5])
-    x̂ = updatestate!(mhe1, [10, 50], [50, 30], [5])
+    mhe1b = MovingHorizonEstimator(nonlinmodel, He=2, nint_u=[1, 1], nint_ym=0, direct=false)
+    JuMP.set_attribute(mhe1b.optim, "tol", 1e-7)
+    preparestate!(mhe1b, [50, 30], [5])
+    x̂ = updatestate!(mhe1b, [10, 50], [50, 30], [5])
     @test x̂ ≈ zeros(6) atol=1e-9
-    @test mhe1.x̂0 ≈ zeros(6) atol=1e-9
-    info = getinfo(mhe1)
+    @test mhe1b.x̂0 ≈ zeros(6) atol=1e-9
+    info = getinfo(mhe1b)
     @test info[:x̂] ≈ x̂ atol=1e-9
     @test info[:Ŷ][end-1:end] ≈ [50, 30] atol=1e-9
 
-    @test initstate!(mhe1, [11, 52], [50, 30], [5]) ≈ zeros(6) atol=1e-9
-    @test mhe1.lastu0 ≈ [1, 2]
-    setstate!(mhe1, [1,2,3,4,5,6])
-    @test mhe1.x̂0 ≈ [1,2,3,4,5,6]
+    mhe1c = MovingHorizonEstimator(nonlinmodel, He=2, direct=false, hessian=true)
+    preparestate!(mhe1c, [50, 30], [5])
+    x̂ = updatestate!(mhe1c, [10, 50], [50, 30], [5])
+    @test x̂ ≈ zeros(6) atol=1e-9
+    @test mhe1c.x̂0 ≈ zeros(6) atol=1e-9
+    info = getinfo(mhe1c)
+    @test info[:x̂] ≈ x̂ atol=1e-9
+    @test info[:Ŷ][end-1:end] ≈ [50, 30] atol=1e-9
+
+    @test initstate!(mhe1c, [11, 52], [50, 30], [5]) ≈ zeros(6) atol=1e-9
+    @test mhe1c.lastu0 ≈ [1, 2]
+    setstate!(mhe1c, [1,2,3,4,5,6])
+    @test mhe1c.x̂0 ≈ [1,2,3,4,5,6]
     for i in 1:40
-        preparestate!(mhe1, [50, 30], [5])
-        updatestate!(mhe1, [11, 52], [50, 30], [5])
+        preparestate!(mhe1c, [50, 30], [5])
+        updatestate!(mhe1c, [11, 52], [50, 30], [5])
     end
-    @test mhe1([5]) ≈ [50, 30] atol=1e-3
+    @test mhe1c([5]) ≈ [50, 30] atol=1e-3
     for i in 1:40
-        preparestate!(mhe1, [51, 32], [5])
-        updatestate!(mhe1, [10, 50], [51, 32], [5])
+        preparestate!(mhe1c, [51, 32], [5])
+        updatestate!(mhe1c, [10, 50], [51, 32], [5])
     end
-    @test mhe1([5]) ≈ [51, 32] atol=1e-3
+    @test mhe1c([5]) ≈ [51, 32] atol=1e-3
 
     mhe2 = MovingHorizonEstimator(linmodel, He=2)
     preparestate!(mhe2, [50, 30], [5])