diff --git a/Project.toml b/Project.toml
index 9ecff6938..2f0aa07d4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -59,7 +59,7 @@ Random = "1.10"
 RecursiveArrayTools = "3.31.2"
 ReTestItems = "1.29"
 Reexport = "1.2"
-SciMLBase = "2.108.0"
+SciMLBase = "2.120.0"
 Sparspak = "0.3.11"
 StaticArrays = "1.9.8"
 Test = "1.10"
diff --git a/docs/src/tutorials/optimal_control.md b/docs/src/tutorials/optimal_control.md
new file mode 100644
index 000000000..f196fec52
--- /dev/null
+++ b/docs/src/tutorials/optimal_control.md
@@ -0,0 +1,101 @@
+# Solve Optimal Control problem
+
+A classical optimal control problem is the rocket launching problem(aka [Goddard Rocket problem](https://en.wikipedia.org/wiki/Goddard_problem)). Say we have a rocket with limited fuel and is launched vertically. And we want to control the final altitude of this rocket so that we can make the best of the limited fuel in rocket to get to the highest altitude. In this optimal control problem, the state variables are:
+
+  - Velocity of the rocket: $x_v(t)$
+  - Altitude of the rocket: $x_h(t)$
+  - Mass of the rocket and the fuel: $x_m(t)$
+
+The control variable is
+
+  - Thrust of the rocket: $u_t(t)$
+
+The dynamics of the launching can be formulated with three differential equations:
+
+$$
+\left\{\begin{aligned}
+&\frac{dx_v}{dt}=\frac{u_t-drag(x_h,x_v)}{x_m}-g(x_h)\\
+&\frac{dx_h}{dt}=x_v\\
+&\frac{dx_m}{dt}=-\frac{u_t}{c}
+\end{aligned}\right.
+$$
+
+where the drag $D(x_h,x_v)$ is a function of altitude and velocity:
+
+$$
+D(x_h,x_v)=D_c\cdot x_v^2\cdot\exp^{h_c(\frac{x_h-x_h(0)}{x_h(0)})}
+$$
+
+gravity $g(x_h)$ is a function of altitude:
+
+$$
+g(x_h)=g_0\cdot (\frac{x_h(0)}{x_h})^2
+$$
+
+$c$ is a constant. Suppose the final time is $T$, we here want to maximize the final altitude $x_h(T)$:
+
+$$
+\max x_h(T)
+$$
+
+The inequality constraints for the state variables and control variables are:
+
+$$
+\left\{\begin{aligned}
+&x_v>0\\
+&x_h>0\\
+&m_T<x_m<m_0\\
+&0<u_t<u_{t\text{max}}
+\end{aligned}\right.
+$$
+
+Similar solving for such optimal control problem can be found on JuMP.jl and InfiniteOpt.jl. The detailed parameters are taken from [COPS](https://www.mcs.anl.gov/%7Emore/cops/cops3.pdf).
+
+```julia
+using BoundaryValueDiffEqMIRK, OptimizationMOI, Ipopt
+h_0 = 1                      # Initial height
+v_0 = 0                      # Initial velocity
+m_0 = 1.0                    # Initial mass
+m_T = 0.6                    # Final mass
+g_0 = 1                      # Gravity at the surface
+h_c = 500                    # Used for drag
+c = 0.5 * sqrt(g_0 * h_0)    # Thrust-to-fuel mass
+D_c = 0.5 * 620 * m_0 / g_0  # Drag scaling
+u_t_max = 3.5 * g_0 * m_0    # Maximum thrust
+T_max = 0.2                  # Number of seconds
+T = 1_000                    # Number of time steps
+Δt = 0.2 / T;                # Time per discretized step
+
+tspan = (0.0, 0.2)
+drag(x_h, x_v) = D_c * x_v^2 * exp(-h_c * (x_h - h_0) / h_0)
+g(x_h) = g_0 * (h_0 / x_h)^2
+function rocket_launch!(du, u, p, t)
+    # u_t is the control variable (thrust)
+    x_v, x_h, x_m, u_t = u[1], u[2], u[3], u[4]
+    du[1] = (u_t-drag(x_h, x_v))/x_m - g(x_h)
+    du[2] = x_v
+    du[3] = -u_t/c
+end
+function rocket_launch_bc!(res, u, p, t)
+    res[1] = u(0.0)[1] - v_0
+    res[2] = u(0.0)[2] - h_0
+    res[3] = u(0.2)[3] - m_T
+    res[4] = u(0.2)[4] - 0.0
+end
+function constraints!(res, u, p)
+    res[1] = u[1]
+    res[2] = u[2]
+    res[3] = u[3]
+    res[4] = u[4]
+end
+cost_fun(u, p) = -u[end - 2] #Final altitude x_h. To minimize, only temporary, need to use temporary solution interpolation here similar to what we do in boundary condition evaluations.
+#cost_fun(sol, p) = -sol(0.2)[2]
+u0 = [v_0, h_0, m_0, 0.0]
+rocket_launch_fun = BVPFunction(rocket_launch!, rocket_launch_bc!; cost = cost_fun,
+    inequality = constraints!, f_prototype = zeros(3))
+rocket_launch_prob = BVProblem(rocket_launch_fun, u0, tspan; lcons = [0.0, h_0, m_T, 0.0],
+    ucons = [Inf, Inf, m_0, u_t_max])
+sol = solve(rocket_launch_prob, MIRK4(; optimize = Ipopt.Optimizer()); dt = 0.002)
+```
+
+Similar optimal control problem solving can also be deployed in JuMP.jl and InfiniteOpt.jl.
diff --git a/lib/BoundaryValueDiffEqCore/src/utils.jl b/lib/BoundaryValueDiffEqCore/src/utils.jl
index a3096f8c0..93fa82df1 100644
--- a/lib/BoundaryValueDiffEqCore/src/utils.jl
+++ b/lib/BoundaryValueDiffEqCore/src/utils.jl
@@ -629,16 +629,28 @@ end
 
 @inline function __extract_lcons_ucons(prob::AbstractBVProblem, ::Type{T}, M, N) where {T}
     lcons = if isnothing(prob.lcons)
-        zeros(T, N*M)
+        zeros(T, N*M) #TODO: handle carefully when NLLS
     else
-        lcons_length = length(prob.lcons)
-        vcat(prob.lcons, zeros(T, N*M - lcons_length))
+        length_f_prototype = length(prob.f.f_prototype)
+        if !(isnothing(prob.f.equality) && isnothing(prob.f.inequality))
+            # When there are additional equality or inequality constraints
+            vcat(repeat(prob.lcons, N), zeros(T, M + (N - 1)*length_f_prototype))
+        else
+            lcons_length = length(prob.lcons)
+            vcat(prob.lcons, zeros(T, N*M - lcons_length))
+        end
     end
     ucons = if isnothing(prob.ucons)
-        zeros(T, N*M)
+        zeros(T, N*M) #TODO: handle carefully when NLLS
     else
-        ucons_length = length(prob.ucons)
-        vcat(prob.ucons, zeros(T, N*M - ucons_length))
+        length_f_prototype = length(prob.f.f_prototype)
+        if !(isnothing(prob.f.equality) && isnothing(prob.f.inequality))
+            # When there are additional equality or inequality constraints
+            vcat(repeat(prob.ucons, N), zeros(T, M + (N - 1)*length_f_prototype))
+        else
+            ucons_length = length(prob.ucons)
+            vcat(prob.ucons, zeros(T, N*M - ucons_length))
+        end
     end
     return lcons, ucons
 end
diff --git a/lib/BoundaryValueDiffEqFIRK/src/algorithms.jl b/lib/BoundaryValueDiffEqFIRK/src/algorithms.jl
index 00f8e5e23..25e43aa19 100644
--- a/lib/BoundaryValueDiffEqFIRK/src/algorithms.jl
+++ b/lib/BoundaryValueDiffEqFIRK/src/algorithms.jl
@@ -121,7 +121,6 @@ for stage in (2, 3, 4, 5)
           - `optimize`: Internal Optimization solver. Any solver which conforms to the SciML
             `OptimizationProblem` interface can be used. Note that any autodiff argument for
             the solver will be ignored and a custom jacobian algorithm will be used.
-
           - `jac_alg`: Jacobian Algorithm used for the nonlinear solver. Defaults to
             `BVPJacobianAlgorithm()`, which automatically decides the best algorithm to
             use based on the input types and problem type.
@@ -132,7 +131,6 @@ for stage in (2, 3, 4, 5)
                 `nonbc_diffmode` defaults to `AutoSparse(AutoForwardDiff)` if possible else
                 `AutoSparse(AutoFiniteDiff)`. For `bc_diffmode`, defaults to `AutoForwardDiff` if
                 possible else `AutoFiniteDiff`.
-
           - `nested_nlsolve`: Whether or not to use a nested nonlinear solve for the
             implicit FIRK step. Defaults to `false`. If set to `false`, the FIRK stages are
             solved as a part of the global residual. The general recommendation is to choose
@@ -150,6 +148,7 @@ for stage in (2, 3, 4, 5)
         ## References
 
         Reference for Lobatto and Radau methods:
+
         ```bibtex
         @Inbook{Jay2015,
             author="Jay, Laurent O.",
@@ -168,7 +167,9 @@ for stage in (2, 3, 4, 5)
             year = {2015},
         }
         ```
+
         References for implementation of defect control, based on the `bvp5c` solver in MATLAB:
+
         ```bibtex
         @article{shampine_solving_nodate,
             title = {Solving {Boundary} {Value} {Problems} for {Ordinary} {Diﬀerential} {Equations} in {Matlab} with bvp4c
diff --git a/lib/BoundaryValueDiffEqMIRK/src/collocation.jl b/lib/BoundaryValueDiffEqMIRK/src/collocation.jl
index cb971737b..88d79bd02 100644
--- a/lib/BoundaryValueDiffEqMIRK/src/collocation.jl
+++ b/lib/BoundaryValueDiffEqMIRK/src/collocation.jl
@@ -1,10 +1,43 @@
-function Φ!(residual, cache::MIRKCache, y, u, trait)
-    return Φ!(residual, cache.fᵢ_cache, cache.k_discrete, cache.f, cache.TU,
-        y, u, cache.p, cache.mesh, cache.mesh_dt, cache.stage, trait)
+function Φ!(residual, cache::MIRKCache, y, u, trait, constraint)
+    return Φ!(residual, cache.fᵢ_cache, cache.k_discrete, cache.f, cache.TU, y, u,
+        cache.p, cache.mesh, cache.mesh_dt, cache.stage, trait, constraint)
 end
 
-@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::MIRKTableau,
-        y, u, p, mesh, mesh_dt, stage::Int, ::DiffCacheNeeded)
+@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::MIRKTableau, y, u, p, mesh,
+        mesh_dt, stage::Int, ::DiffCacheNeeded, constraint::Val{true})
+    (; c, v, x, b) = TU
+
+    ttt = get_tmp(fᵢ_cache, u)
+    tmp = copy(ttt[1:3])
+
+    length_control = length(last(residual))
+
+    T = eltype(u)
+    for i in eachindex(k_discrete)
+        K = get_tmp(k_discrete[i], u)
+        residᵢ = residual[i]
+        h = mesh_dt[i]
+
+        yᵢ = get_tmp(y[i], u)
+        yᵢ₊₁ = get_tmp(y[i + 1], u)
+
+        yᵢ, uᵢ = yᵢ[1:(end - length_control)], yᵢ[(end - length_control + 1):end]
+        yᵢ₊₁, uᵢ₊₁ = yᵢ₊₁[1:(end - length_control)], yᵢ₊₁[(end - length_control + 1):end]
+
+        for r in 1:stage
+            @. tmp = (1 - v[r]) * yᵢ + v[r] * yᵢ₊₁
+            __maybe_matmul!(tmp, K[:, 1:(r - 1)], x[r, 1:(r - 1)], h, T(1))
+            f!(K[:, r], vcat(tmp, uᵢ), p, mesh[i] + c[r] * h)
+        end
+
+        # Update residual
+        @. residᵢ = yᵢ₊₁ - yᵢ
+        __maybe_matmul!(residᵢ, K[:, 1:stage], b[1:stage], -h, T(1))
+    end
+end
+
+@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::MIRKTableau, y, u, p, mesh,
+        mesh_dt, stage::Int, ::DiffCacheNeeded, constraint::Val{false})
     (; c, v, x, b) = TU
 
     tmp = get_tmp(fᵢ_cache, u)
@@ -29,8 +62,8 @@ end
     end
 end
 
-@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::MIRKTableau, y,
-        u, p, mesh, mesh_dt, stage::Int, ::NoDiffCacheNeeded)
+@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::MIRKTableau, y, u, p, mesh,
+        mesh_dt, stage::Int, ::NoDiffCacheNeeded, constraint::Val{false})
     (; c, v, x, b) = TU
 
     tmp = similar(fᵢ_cache)
diff --git a/lib/BoundaryValueDiffEqMIRK/src/mirk.jl b/lib/BoundaryValueDiffEqMIRK/src/mirk.jl
index 98d89646c..935f8e7e8 100644
--- a/lib/BoundaryValueDiffEqMIRK/src/mirk.jl
+++ b/lib/BoundaryValueDiffEqMIRK/src/mirk.jl
@@ -12,6 +12,7 @@
     alg                        # MIRK methods
     TU                         # MIRK Tableau
     ITU                        # MIRK Interpolation Tableau
+    f_prototype
     bcresid_prototype
     # Everything below gets resized in adaptive methods
     mesh                       # Discrete mesh
@@ -43,6 +44,7 @@ function SciMLBase.__init(
     diffcache = __cache_trait(alg.jac_alg)
     @assert (iip || isnothing(alg.optimize)) "Out-of-place constraints don't allow optimization solvers "
     fit_parameters = haskey(prob.kwargs, :fit_parameters)
+    constraint = (!isnothing(prob.f.inequality)) || (!isnothing(prob.f.equality))
 
     t₀, t₁ = prob.tspan
     ig, T,
@@ -64,18 +66,42 @@ function SciMLBase.__init(
     y = __alloc.(copy.(y₀.u))
     TU, ITU = constructMIRK(alg, T)
     stage = alg_stage(alg)
+    f_prototype = isnothing(prob.f.f_prototype) ? nothing : __vec(prob.f.f_prototype)
 
-    k_discrete = [__maybe_allocate_diffcache(safe_similar(X, N, stage), chunksize, alg.jac_alg)
-                  for _ in 1:Nig]
-    k_interp = VectorOfArray([similar(X, N, ITU.s_star - stage) for _ in 1:Nig])
+    k_discrete = if !constraint
+        [__maybe_allocate_diffcache(safe_similar(X, N, stage), chunksize, alg.jac_alg)
+         for _ in 1:Nig]
+    else
+        [__maybe_allocate_diffcache(safe_similar(X, N - length(f_prototype), stage), chunksize, alg.jac_alg)
+         for _ in 1:Nig]
+    end
+    k_interp = if !constraint
+        VectorOfArray([similar(X, N, ITU.s_star - stage) for _ in 1:Nig])
+    else
+        VectorOfArray([similar(X, N - length(f_prototype), ITU.s_star - stage)
+                       for _ in 1:Nig])
+    end
 
     bcresid_prototype, resid₁_size = __get_bcresid_prototype(prob.problem_type, prob, X)
 
+    # residual should be [bc_resid_prototype; f_prototype; f_prototype; f_prototype; f_prototype...]
     residual = if iip
-        if prob.problem_type isa TwoPointBVProblem
-            vcat([__alloc(__vec(bcresid_prototype))], __alloc.(copy.(@view(y₀.u[2:end]))))
+        if !constraint
+            if prob.problem_type isa TwoPointBVProblem
+                vcat([__alloc(__vec(bcresid_prototype))], __alloc.(copy.(@view(y₀.u[2:end]))))
+            else
+                vcat([__alloc(bcresid_prototype)], __alloc.(copy.(@view(y₀.u[2:end]))))
+            end
         else
-            vcat([__alloc(bcresid_prototype)], __alloc.(copy.(@view(y₀.u[2:end]))))
+            if prob.problem_type isa TwoPointBVProblem
+                #vcat(__alloc.(copy.(y₀.u)), [__alloc(__vec(bcresid_prototype))],
+                #    __alloc.(copy.(@view(y₀.u[2:end]))))
+            else
+                #vcat(__alloc.(copy.(y₀.u)), [__alloc(bcresid_prototype)],
+                #    __alloc.(copy.(@view(y₀.u[2:end]))))
+                vcat(__alloc.(copy.(y₀.u)), [__alloc(bcresid_prototype)],
+                    __alloc.(copy.([f_prototype for _ in 1:Nig])))
+            end
         end
     else
         nothing
@@ -90,18 +116,23 @@ function SciMLBase.__init(
     bcresid_prototype = __vec(bcresid_prototype)
     f,
     bc = if X isa AbstractVector
-        #TODO: Simplify the logic by wrapping the functions
-        if fit_parameters == true
+        f_wrapped = prob.f
+        bc_wrapped = prob.f.bc
+        if fit_parameters
             l_parameters = length(prob.p)
-            vecf! = function (du, u, p, t)
-                prob.f(du, u, @view(u[(end - l_parameters + 1):end]), t)
-                du[(end - l_parameters + 1):end] .= 0
+            base_f = f_wrapped
+            f_wrapped = @closure (du,
+                u,
+                p,
+                t) -> begin
+                @inbounds @views begin
+                    base_f(du, u, u[(end - l_parameters + 1):end], t)
+                    fill!(du[(end - l_parameters + 1):end], zero(eltype(du)))
+                end
+                return nothing
             end
-            vecbc! = prob.f.bc
-            vecf!, vecbc!
-        else
-            prob.f, prob.f.bc
         end
+        f_wrapped, bc_wrapped
     elseif iip
         vecf! = @closure (du, u, p, t) -> __vec_f!(du, u, p, t, prob.f, size(X))
         vecbc! = if !(prob.problem_type isa TwoPointBVProblem)
@@ -128,9 +159,9 @@ function SciMLBase.__init(
     prob_ = !(prob.u0 isa AbstractArray) ? remake(prob; u0 = X) : prob
 
     return MIRKCache{iip, T, use_both, typeof(diffcache), fit_parameters}(
-        alg_order(alg), stage, N, size(X), f, bc, prob_, prob.problem_type, prob.p,
-        alg, TU, ITU, bcresid_prototype, mesh, mesh_dt, k_discrete, k_interp, y, y₀,
-        residual, fᵢ_cache, fᵢ₂_cache, errors, new_stages, resid₁_size, nlsolve_kwargs,
+        alg_order(alg), stage, N, size(X), f, bc, prob_, prob.problem_type, prob.p, alg,
+        TU, ITU, f_prototype, bcresid_prototype, mesh, mesh_dt, k_discrete, k_interp, y,
+        y₀, residual, fᵢ_cache, fᵢ₂_cache, errors, new_stages, resid₁_size, nlsolve_kwargs,
         optimize_kwargs, (; abstol, dt, adaptive, controller, fit_parameters, kwargs...))
 end
 
@@ -239,6 +270,13 @@ end
 
 # Constructing the Nonlinear Problem
 function __construct_problem(cache::MIRKCache{iip}, y::AbstractVector, y₀::AbstractVectorOfArray) where {iip}
+    constraint = (!isnothing(cache.prob.f.inequality)) ||
+                 (!isnothing(cache.prob.f.equality))
+    return __construct_problem(cache, y, y₀, Val(constraint))
+end
+
+function __construct_problem(cache::MIRKCache{iip}, y::AbstractVector,
+        y₀::AbstractVectorOfArray, constraint::Val{false}) where {iip}
     pt = cache.problem_type
     (; jac_alg) = cache.alg
 
@@ -259,7 +297,7 @@ function __construct_problem(cache::MIRKCache{iip}, y::AbstractVector, y₀::Abs
         @closure (du,
             u,
             p) -> __mirk_loss_collocation!(
-            du, u, p, cache.y, cache.mesh, cache.residual, cache, trait)
+            du, u, p, cache.y, cache.mesh, cache.residual, cache, trait, constraint)
     else
         @closure (u,
             p) -> __mirk_loss_collocation(
@@ -270,21 +308,78 @@ function __construct_problem(cache::MIRKCache{iip}, y::AbstractVector, y₀::Abs
         @closure (du,
             u,
             p) -> __mirk_loss!(du, u, p, cache.y, pt, cache.bc, cache.residual,
-            cache.mesh, cache, eval_sol, trait)
+            cache.mesh, cache, eval_sol, trait, constraint)
     else
         @closure (u,
             p) -> __mirk_loss(
             u, p, cache.y, pt, cache.bc, cache.mesh, cache, eval_sol, trait)
     end
 
-    return __construct_problem(cache, y, loss_bc, loss_collocation, loss, pt)
+    return __construct_problem(cache, y, loss_bc, loss_collocation, loss, pt, constraint)
 end
 
-@views function __mirk_loss!(resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual,
-        mesh, cache, EvalSol, trait::DiffCacheNeeded) where {BC}
+function __construct_problem(cache::MIRKCache{iip}, y::AbstractVector,
+        y₀::AbstractVectorOfArray, constraint::Val{true}) where {iip}
+    pt = cache.problem_type
+    (; jac_alg) = cache.alg
+
+    eval_sol = EvalSol(__restructure_sol(y₀.u, cache.in_size), cache.mesh, cache)
+
+    trait = __cache_trait(jac_alg)
+
+    loss_bc = if iip
+        @closure (du,
+            u,
+            p) -> __mirk_loss_bc!(du, u, p, pt, cache.bc, cache.y, cache.mesh, cache, trait)
+    else
+        @closure (
+            u, p) -> __mirk_loss_bc(u, p, pt, cache.bc, cache.y, cache.mesh, cache, trait)
+    end
+
+    loss_collocation = if iip
+        @closure (du,
+            u,
+            p) -> __mirk_loss_collocation!(
+            du, u, p, cache.y, cache.mesh, cache.residual, cache, trait, constraint)
+    else
+        @closure (u,
+            p) -> __mirk_loss_collocation(
+            u, p, cache.y, cache.mesh, cache.residual, cache, trait)
+    end
+
+    loss_constraint = @closure (du,
+        u,
+        p) -> __mirk_loss_constraint!(
+        du, u, p, cache.y, cache.mesh, cache.residual, cache, trait)
+
+    loss = if iip
+        @closure (du,
+            u,
+            p) -> __mirk_loss!(du, u, p, cache.y, pt, cache.bc, cache.residual,
+            cache.mesh, cache, eval_sol, trait, constraint)
+    else
+        @closure (u,
+            p) -> __mirk_loss(
+            u, p, cache.y, pt, cache.bc, cache.mesh, cache, eval_sol, trait)
+    end
+
+    return __construct_problem(
+        cache, y, loss_bc, loss_collocation, loss_constraint, loss, pt, constraint)
+end
+
+# Let's only do inplace version now since Optimization.jl only supports that.
+
+# J = [J_constraints;
+#      J_bvp]
+# where J_constraints is the Jacobian from equality/inequality constraints(whole-time equality/inequality)
+# and J_bvp is the Jacobian computed from bc and collocation
+
+@views function __mirk_loss!(
+        resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual, mesh, cache,
+        EvalSol, trait::DiffCacheNeeded, constraint::Val{false}) where {BC}
     y_ = recursive_unflatten!(y, u)
     resids = [get_tmp(r, u) for r in residual]
-    Φ!(resids[2:end], cache, y_, u, trait)
+    Φ!(resids[2:end], cache, y_, u, trait, constraint)
     EvalSol.u[1:end] .= __restructure_sol(y_, cache.in_size)
     EvalSol.cache.k_discrete[1:end] .= cache.k_discrete
     eval_bc_residual!(resids[1], pt, bc!, EvalSol, p, mesh)
@@ -292,10 +387,11 @@ end
     return nothing
 end
 
-@views function __mirk_loss!(resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual,
-        mesh, cache, EvalSol, trait::NoDiffCacheNeeded) where {BC}
+@views function __mirk_loss!(
+        resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual, mesh, cache,
+        EvalSol, trait::NoDiffCacheNeeded, constraint::Val{false}) where {BC}
     y_ = recursive_unflatten!(y, u)
-    Φ!(residual[2:end], cache, y_, u, trait)
+    Φ!(residual[2:end], cache, y_, u, trait, constraint)
     EvalSol.u[1:end] .= __restructure_sol(y_, cache.in_size)
     EvalSol.cache.k_discrete[1:end] .= cache.k_discrete
     eval_bc_residual!(residual[1], pt, bc!, EvalSol, p, mesh)
@@ -303,11 +399,12 @@ end
     return nothing
 end
 
-@views function __mirk_loss!(resid, u, p, y, pt::TwoPointBVProblem, bc!::Tuple{BC1, BC2},
-        residual, mesh, cache, _, trait::DiffCacheNeeded) where {BC1, BC2}
+@views function __mirk_loss!(
+        resid, u, p, y, pt::TwoPointBVProblem, bc!::Tuple{BC1, BC2}, residual, mesh,
+        cache, _, trait::DiffCacheNeeded, constraint::Val{false}) where {BC1, BC2}
     y_ = recursive_unflatten!(y, u)
     resids = [get_tmp(r, u) for r in residual]
-    Φ!(resids[2:end], cache, y_, u, trait)
+    Φ!(resids[2:end], cache, y_, u, trait, constraint)
     resida = resids[1][1:prod(cache.resid_size[1])]
     residb = resids[1][(prod(cache.resid_size[1]) + 1):end]
     eval_bc_residual!((resida, residb), pt, bc!, y_, p, mesh)
@@ -315,10 +412,11 @@ end
     return nothing
 end
 
-@views function __mirk_loss!(resid, u, p, y, pt::TwoPointBVProblem, bc!::Tuple{BC1, BC2},
-        residual, mesh, cache, _, trait::NoDiffCacheNeeded) where {BC1, BC2}
+@views function __mirk_loss!(
+        resid, u, p, y, pt::TwoPointBVProblem, bc!::Tuple{BC1, BC2}, residual, mesh,
+        cache, _, trait::NoDiffCacheNeeded, constraint::Val{false}) where {BC1, BC2}
     y_ = recursive_unflatten!(y, u)
-    Φ!(residual[2:end], cache, y_, u, trait)
+    Φ!(residual[2:end], cache, y_, u, trait, constraint)
     resida = residual[1][1:prod(cache.resid_size[1])]
     residb = residual[1][(prod(cache.resid_size[1]) + 1):end]
     eval_bc_residual!((resida, residb), pt, bc!, y_, p, mesh)
@@ -326,6 +424,23 @@ end
     return nothing
 end
 
+@views function __mirk_loss!(
+        resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual, mesh, cache,
+        EvalSol, trait::DiffCacheNeeded, constraint::Val{true}) where {BC}
+    y_ = recursive_unflatten!(y, u)
+    L = length(y_)
+    resids = [get_tmp(r, u) for r in residual]
+    # whole-time inequality constraints
+    cache.prob.f.inequality.(resids[1:L], y_, nothing)
+    Φ!(resids[(L + 2):2L], cache, y_, u, trait, constraint)
+    EvalSol.u[1:end] .= __restructure_sol(y_, cache.in_size)
+    EvalSol.cache.k_discrete[1:end] .= cache.k_discrete
+    eval_bc_residual!(resids[L + 1], pt, bc!, EvalSol, p, mesh)
+
+    recursive_flatten!(resid, resids)
+    return nothing
+end
+
 @views function __mirk_loss(
         u, p, y, pt::StandardBVProblem, bc::BC, mesh, cache, EvalSol, trait) where {BC}
     y_ = recursive_unflatten!(y, u)
@@ -344,6 +459,10 @@ end
     return vcat(resid_bca, mapreduce(vec, vcat, resid_co), resid_bcb)
 end
 
+################################################################################
+# Dispatch for boundary condition part
+################################################################################
+
 @views function __mirk_loss_bc!(
         resid, u, p, pt, bc!::BC, y, mesh, cache::MIRKCache, trait) where {BC}
     y_ = recursive_unflatten!(y, u)
@@ -359,20 +478,46 @@ end
     return eval_bc_residual(pt, bc!, soly_, p, mesh)
 end
 
-@views function __mirk_loss_collocation!(
-        resid, u, p, y, mesh, residual, cache, trait::DiffCacheNeeded)
+################################################################################
+# Dispatch for collocation part
+################################################################################
+
+@views function __mirk_loss_collocation!(resid, u, p, y, mesh, residual, cache,
+        trait::DiffCacheNeeded, constraint::Val{false})
+    y_ = recursive_unflatten!(y, u)
+    N = length(y)
+    resids = [get_tmp(r, u) for r in residual[2:N]]
+    Φ!(resids, cache, y_, u, trait, constraint)
+    recursive_flatten!(resid, resids)
+    return nothing
+end
+
+@views function __mirk_loss_collocation!(resid, u, p, y, mesh, residual, cache,
+        trait::DiffCacheNeeded, constraint::Val{true})
     y_ = recursive_unflatten!(y, u)
-    resids = [get_tmp(r, u) for r in residual[2:end]]
-    Φ!(resids, cache, y_, u, trait)
+    N = length(y)
+    resids = [get_tmp(r, u) for r in residual[(N + 2):end]]
+    Φ!(resids, cache, y_, u, trait, constraint)
     recursive_flatten!(resid, resids)
     return nothing
 end
 
-@views function __mirk_loss_collocation!(
-        resid, u, p, y, mesh, residual, cache, trait::NoDiffCacheNeeded)
+@views function __mirk_loss_collocation!(resid, u, p, y, mesh, residual, cache,
+        trait::NoDiffCacheNeeded, constraint::Val{false})
     y_ = recursive_unflatten!(y, u)
-    resids = [r for r in residual[2:end]]
-    Φ!(resids, cache, y_, u, trait)
+    N = length(y)
+    resids = [r for r in residual[2:N]]
+    Φ!(resids, cache, y_, u, trait, constraint)
+    recursive_flatten!(resid, resids)
+    return nothing
+end
+
+@views function __mirk_loss_collocation!(resid, u, p, y, mesh, residual, cache,
+        trait::NoDiffCacheNeeded, constraint::Val{true})
+    y_ = recursive_unflatten!(y, u)
+    N = length(y)
+    resids = [r for r in residual[(N + 2):end]]
+    Φ!(resids, cache, y_, u, trait, constraint)
     recursive_flatten!(resid, resids)
     return nothing
 end
@@ -383,8 +528,23 @@ end
     return mapreduce(vec, vcat, resids)
 end
 
-function __construct_problem(cache::MIRKCache{iip}, y, loss_bc::BC, loss_collocation::C,
-        loss::LF, ::StandardBVProblem) where {iip, BC, C, LF}
+################################################################################
+# Dispatch for problems with constraints
+################################################################################
+
+@views function __mirk_loss_constraint!(
+        resid, u, p, y, mesh, residual, cache, trait::DiffCacheNeeded)
+    y_ = recursive_unflatten!(y, u)
+    N = length(y)
+    resids = [get_tmp(r, u) for r in residual[1:N]]
+    cache.prob.f.inequality.(resids, y_, nothing)
+    recursive_flatten!(resid, resids)
+    return nothing
+end
+
+function __construct_problem(
+        cache::MIRKCache{iip}, y, loss_bc::BC, loss_collocation::C, loss::LF,
+        ::StandardBVProblem, constraint::Val{false}) where {iip, BC, C, LF}
     (; jac_alg) = cache.alg
     (; bc_diffmode) = jac_alg
     N = length(cache.mesh)
@@ -463,6 +623,95 @@ function __construct_problem(cache::MIRKCache{iip}, y, loss_bc::BC, loss_colloca
         resid_prototype, y, cache.p, cache.M, N)
 end
 
+# Dispatch for problems with constraints
+function __construct_problem(
+        cache::MIRKCache{iip}, y, loss_bc::BC, loss_collocation::C, loss_constraint::LC,
+        loss::LF, ::StandardBVProblem, constraint::Val{true}) where {iip, BC, C, LC, LF}
+    (; jac_alg) = cache.alg
+    (; bc_diffmode) = jac_alg
+    (; f_prototype) = cache
+    N = length(cache.mesh)
+
+    resid_bc = cache.bcresid_prototype
+    L = length(resid_bc)
+    resid_collocation = safe_similar(y, length(f_prototype) * (N - 1))
+    resid_prototype = safe_similar(y, cache.M * N)
+
+    cache_bc = if iip
+        DI.prepare_jacobian(loss_bc, resid_bc, bc_diffmode, y, Constant(cache.p))
+    else
+        DI.prepare_jacobian(loss_bc, bc_diffmode, y, Constant(cache.p))
+    end
+
+    nonbc_diffmode = if jac_alg.nonbc_diffmode isa AutoSparse
+        if L < cache.M
+            # For underdetermined problems we use sparse since we don't have banded qr
+            J_full_band = nothing
+            sparse_jacobian_prototype = __generate_sparse_jacobian_prototype(
+                cache, cache.problem_type, y, y, cache.M, N)
+        else
+            J_full_band = BandedMatrix(
+                Ones{eltype(y)}(L + length(f_prototype) * (N - 1), cache.M * N),
+                (L + 1, cache.M + max(cache.M - L, 0)))
+            sparse_jacobian_prototype = BandedMatrix(
+                Ones{eltype(y)}(length(f_prototype) * (N - 1), cache.M * N), (
+                    1, 2cache.M - 1))
+        end
+        AutoSparse(get_dense_ad(jac_alg.nonbc_diffmode);
+            sparsity_detector = ADTypes.KnownJacobianSparsityDetector(sparse_jacobian_prototype),
+            coloring_algorithm = __default_coloring_algorithm(jac_alg.nonbc_diffmode))
+    else
+        J_full_band = nothing
+        jac_alg.nonbc_diffmode
+    end
+
+    cache_collocation = if iip
+        DI.prepare_jacobian(
+            loss_collocation, resid_collocation, nonbc_diffmode, y, Constant(cache.p))
+    else
+        DI.prepare_jacobian(loss_collocation, nonbc_diffmode, y, Constant(cache.p))
+    end
+
+    J_bc = if iip
+        DI.jacobian(loss_bc, resid_bc, cache_bc, bc_diffmode, y, Constant(cache.p))
+    else
+        DI.jacobian(loss_bc, cache_bc, bc_diffmode, y, Constant(cache.p))
+    end
+    J_c = if iip
+        DI.jacobian(loss_collocation, resid_collocation, cache_collocation,
+            nonbc_diffmode, y, Constant(cache.p))
+    else
+        DI.jacobian(
+            loss_collocation, cache_collocation, nonbc_diffmode, y, Constant(cache.p))
+    end
+
+    if J_full_band === nothing
+        jac_prototype = vcat(J_bc, J_c)
+    else
+        jac_prototype = AlmostBandedMatrix{eltype(cache)}(J_full_band, J_bc)
+    end
+
+    constraint_diffmode = AutoSparse(AutoForwardDiff();
+        sparsity_detector = BoundaryValueDiffEqCore.TracerLocalSparsityDetector(),
+        coloring_algorithm = __default_coloring_algorithm(jac_alg.bc_diffmode))
+
+    cache_constraint = DI.prepare_jacobian(
+        loss_constraint, resid_prototype, constraint_diffmode, y, Constant(cache.p))
+    J_constraint = DI.jacobian(loss_constraint, resid_prototype, cache_constraint,
+        constraint_diffmode, y, Constant(cache.p))
+    jac_prototype = vcat(J_constraint, jac_prototype)
+
+    jac = @closure (J,
+        u,
+        p) -> __mirk_mpoint_jacobian!(
+        J, J_c, u, bc_diffmode, nonbc_diffmode, constraint_diffmode, cache_bc,
+        cache_collocation, cache_constraint, loss_bc, loss_collocation,
+        loss_constraint, resid_bc, resid_collocation, resid_prototype, L, cache.p)
+
+    return __construct_internal_problem(cache.prob, cache.alg, loss, jac, jac_prototype,
+        resid_prototype, y, cache.p, cache.M, N)
+end
+
 function __mirk_mpoint_jacobian!(
         J, _, x, bc_diffmode, nonbc_diffmode, bc_diffcache, nonbc_diffcache, loss_bc::BC,
         loss_collocation::C, resid_bc, resid_collocation, L::Int, p) where {BC, C}
@@ -473,6 +722,34 @@ function __mirk_mpoint_jacobian!(
     return nothing
 end
 
+function __mirk_mpoint_jacobian!(J::AlmostBandedMatrix, J_c, x, bc_diffmode, nonbc_diffmode,
+        bc_diffcache, nonbc_diffcache, inequality_diffcache,
+        loss_bc::BC, loss_collocation::C, loss_inequality, resid_bc,
+        resid_collocation, resid_prototype, L::Int, p) where {BC, C}
+    J_bc = fillpart(J)
+    DI.jacobian!(loss_collocation, resid_collocation, J_c,
+        nonbc_diffcache, nonbc_diffmode, x, Constant(p))
+    DI.jacobian!(loss_bc, resid_bc, J_bc, bc_diffcache, bc_diffmode, x, Constant(p))
+    exclusive_bandpart(J) .= J_c
+    finish_part_setindex!(J)
+    return nothing
+end
+
+function __mirk_mpoint_jacobian!(J, _, x, bc_diffmode, nonbc_diffmode, constraint_diffmode,
+        bc_diffcache, nonbc_diffcache, constraint_diffcache,
+        loss_bc::BC, loss_collocation::C, loss_inequality, resid_bc,
+        resid_collocation, resid_prototype, L::Int, p) where {BC, C}
+    N = length(x)
+    # The Jacobian of constraints
+    DI.jacobian!(loss_inequality, resid_prototype, @view(J[1:N, :]),
+        constraint_diffcache, constraint_diffmode, x, Constant(p))
+    DI.jacobian!(loss_bc, resid_bc, @view(J[(N + 1):(N + L), :]),
+        bc_diffcache, bc_diffmode, x, Constant(p))
+    DI.jacobian!(loss_collocation, resid_collocation, @view(J[(N + L + 1):end, :]),
+        nonbc_diffcache, nonbc_diffmode, x, Constant(p))
+    return nothing
+end
+
 function __mirk_mpoint_jacobian!(J::AlmostBandedMatrix, J_c, x, bc_diffmode, nonbc_diffmode,
         bc_diffcache, nonbc_diffcache, loss_bc::BC, loss_collocation::C,
         resid_bc, resid_collocation, L::Int, p) where {BC, C}
@@ -505,8 +782,9 @@ function __mirk_mpoint_jacobian(
     return J
 end
 
-function __construct_problem(cache::MIRKCache{iip}, y, loss_bc::BC, loss_collocation::C,
-        loss::LF, ::TwoPointBVProblem) where {iip, BC, C, LF}
+function __construct_problem(
+        cache::MIRKCache{iip}, y, loss_bc::BC, loss_collocation::C, loss::LF,
+        ::TwoPointBVProblem, constraint::Val{false}) where {iip, BC, C, LF}
     (; jac_alg) = cache.alg
     N = length(cache.mesh)