diff --git a/docs/src/reference.md b/docs/src/reference.md index c9990d96..044090f7 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -9,4 +9,6 @@ CUTEst.cons_coord CUTEst.cons_coord! CUTEst.consjac +CUTEst.prepare_input! +CUTEst.prepare_output! ``` diff --git a/src/julia_interface.jl b/src/julia_interface.jl index 3187ca0d..c448e57c 100644 --- a/src/julia_interface.jl +++ b/src/julia_interface.jl @@ -2,6 +2,45 @@ export cons_coord, cons_coord!, consjac using NLPModels, SparseArrays +""" + prepare_input!(workspace::Vector{T}, x::AbstractVector{S}) where {T, S} + +Prepare input vector `x` for use with CUTEst functions, using preallocated workspace +to avoid allocations when type conversion is needed. + +Returns the input vector directly if no conversion is needed, or the workspace vector +with converted values if type conversion is required. +""" +@inline function prepare_input!(workspace::Vector{T}, x::AbstractVector{S}) where {T, S} + if S === T && typeof(x) <: Vector{T} + return x + else + resize!(workspace, length(x)) + workspace .= x + return workspace + end +end + +""" + prepare_output!(workspace::Vector{T}, target::AbstractVector{S}, source::AbstractVector{T}) where {T, S} + +Prepare output by copying from source to target, using workspace for type conversion if needed. +""" +@inline function prepare_output!( + workspace::Vector{T}, + target::AbstractVector{S}, + source::AbstractVector{T}, +) where {T, S} + if S === T && typeof(target) <: Vector{T} + target .= source + else + resize!(workspace, length(source)) + workspace .= source + target .= workspace + end + return target +end + function NLPModels.obj(nlp::CUTEstModel{T}, x::StrideOneVector{T}) where {T} @lencheck nlp.meta.nvar x if nlp.meta.ncon > 0 @@ -40,10 +79,16 @@ end function NLPModels.grad!(nlp::CUTEstModel{T}, x::AbstractVector, g::AbstractVector) where {T} @lencheck nlp.meta.nvar x g - x_ = Vector{T}(x) - g_ = Vector{T}(g) - grad!(nlp, x_, g_) - g .= g_ + + x_prepared = prepare_input!(nlp.input_workspace, x) + + if typeof(g) <: Vector{T} + grad!(nlp, x_prepared, g) + else + resize!(nlp.output_workspace, length(g)) + grad!(nlp, x_prepared, view(nlp.output_workspace, 1:length(g))) + g .= view(nlp.output_workspace, 1:length(g)) + end end function NLPModels.objcons!( @@ -105,13 +150,15 @@ end function NLPModels.objgrad!(nlp::CUTEstModel{T}, x::AbstractVector, g::StrideOneVector{T}) where {T} @lencheck nlp.meta.nvar x g - objgrad!(nlp, Vector{T}(x), g) + x_prepared = prepare_input!(nlp.input_workspace, x) + objgrad!(nlp, x_prepared, g) end function NLPModels.objgrad!(nlp::CUTEstModel{T}, x::AbstractVector, g::AbstractVector) where {T} @lencheck nlp.meta.nvar x g + x_prepared = prepare_input!(nlp.input_workspace, x) gc = nlp.workspace_nvar - f, _ = objgrad!(nlp, Vector{T}(x), gc) + f, _ = objgrad!(nlp, x_prepared, gc) g .= gc return f, g end @@ -179,15 +226,30 @@ function cons_coord!( @lencheck nlp.meta.nvar x @lencheck nlp.meta.ncon c @lencheck nlp.meta.nnzj rows cols vals - rows_ = Vector{Cint}(undef, nlp.meta.nnzj) - cols_ = Vector{Cint}(undef, nlp.meta.nnzj) - vals_ = Vector{T}(undef, nlp.meta.nnzj) - c_ = Vector{T}(undef, nlp.meta.ncon) - cons_coord!(nlp, Vector{T}(x), c_, rows_, cols_, vals_) - rows .= rows_ - cols .= cols_ - vals .= vals_ - c .= c_ + + nnzj = nlp.meta.nnzj + if length(nlp.jac_coord_rows) < nnzj + resize!(nlp.jac_coord_rows, nnzj) + resize!(nlp.jac_coord_cols, nnzj) + resize!(nlp.jac_coord_vals, nnzj) + end + if length(nlp.cons_vals) < nlp.meta.ncon + resize!(nlp.cons_vals, nlp.meta.ncon) + end + + cons_coord!( + nlp, + Vector{T}(x), + view(nlp.cons_vals, 1:nlp.meta.ncon), + view(nlp.jac_coord_rows, 1:nnzj), + view(nlp.jac_coord_cols, 1:nnzj), + view(nlp.jac_coord_vals, 1:nnzj), + ) + + rows .= view(nlp.jac_coord_rows, 1:nnzj) + cols .= view(nlp.jac_coord_cols, 1:nnzj) + vals .= view(nlp.jac_coord_vals, 1:nnzj) + c .= view(nlp.cons_vals, 1:nlp.meta.ncon) return c, rows, cols, vals end @@ -209,11 +271,32 @@ Usage: """ function cons_coord(nlp::CUTEstModel{T}, x::StrideOneVector{T}) where {T} @lencheck nlp.meta.nvar x - c = Vector{T}(undef, nlp.meta.ncon) - rows = Vector{Cint}(undef, nlp.meta.nnzj) - cols = Vector{Cint}(undef, nlp.meta.nnzj) - vals = Vector{T}(undef, nlp.meta.nnzj) - cons_coord!(nlp, x, c, rows, cols, vals) + + nnzj = nlp.meta.nnzj + if length(nlp.jac_coord_rows) < nnzj + resize!(nlp.jac_coord_rows, nnzj) + resize!(nlp.jac_coord_cols, nnzj) + resize!(nlp.jac_coord_vals, nnzj) + end + if length(nlp.cons_vals) < nlp.meta.ncon + resize!(nlp.cons_vals, nlp.meta.ncon) + end + + cons_coord!( + nlp, + x, + view(nlp.cons_vals, 1:nlp.meta.ncon), + view(nlp.jac_coord_rows, 1:nnzj), + view(nlp.jac_coord_cols, 1:nnzj), + view(nlp.jac_coord_vals, 1:nnzj), + ) + + c = copy(view(nlp.cons_vals, 1:nlp.meta.ncon)) + rows = copy(view(nlp.jac_coord_rows, 1:nnzj)) + cols = copy(view(nlp.jac_coord_cols, 1:nnzj)) + vals = copy(view(nlp.jac_coord_vals, 1:nnzj)) + + return c, rows, cols, vals end function cons_coord(nlp::CUTEstModel{T}, x::AbstractVector) where {T} @@ -630,9 +713,19 @@ function NLPModels.hess_coord!( @lencheck nlp.meta.nvar x @lencheck nlp.meta.ncon y @lencheck nlp.meta.nnzh vals - vals_ = Vector{T}(undef, nlp.meta.nnzh) - NLPModels.hess_coord!(nlp, Vector{T}(x), convert(Vector{T}, y), vals_, obj_weight = obj_weight) - vals .= vals_ + + if length(nlp.hess_coord_vals) < nlp.meta.nnzh + resize!(nlp.hess_coord_vals, nlp.meta.nnzh) + end + + NLPModels.hess_coord!( + nlp, + Vector{T}(x), + convert(Vector{T}, y), + view(nlp.hess_coord_vals, 1:nlp.meta.nnzh), + obj_weight = obj_weight, + ) + vals .= view(nlp.hess_coord_vals, 1:nlp.meta.nnzh) return vals end diff --git a/src/model.jl b/src/model.jl index 02728eb9..e527a866 100644 --- a/src/model.jl +++ b/src/model.jl @@ -15,6 +15,17 @@ mutable struct CUTEstModel{T} <: AbstractNLPModel{T, Vector{T}} workspace_nvar::Vector{T} workspace_ncon::Vector{T} + jac_coord_rows::Vector{Cint} + jac_coord_cols::Vector{Cint} + jac_coord_vals::Vector{T} + hess_coord_vals::Vector{T} + + cons_vals::Vector{T} + cons_nln_vals::Vector{T} + + input_workspace::Vector{T} + output_workspace::Vector{T} + Jval::Vector{T} Jvar::Vector{Cint} @@ -297,6 +308,18 @@ function CUTEstModel{T}( workspace_nvar = Vector{T}(undef, nvar) workspace_ncon = Vector{T}(undef, ncon) + jac_coord_rows = Vector{Cint}(undef, nnzj) + jac_coord_cols = Vector{Cint}(undef, nnzj) + jac_coord_vals = Vector{T}(undef, nnzj) + hess_coord_vals = Vector{T}(undef, nnzh) + + cons_vals = Vector{T}(undef, ncon) + nnln = count(.!linear) + cons_nln_vals = Vector{T}(undef, nnln) + + input_workspace = Vector{T}(undef, nvar) + output_workspace = Vector{T}(undef, max(nvar, ncon)) + fclose(T, libsif, funit, status) cutest_error(status[]) @@ -330,6 +353,14 @@ function CUTEstModel{T}( clinvals, workspace_nvar, workspace_ncon, + jac_coord_rows, + jac_coord_cols, + jac_coord_vals, + hess_coord_vals, + cons_vals, + cons_nln_vals, + input_workspace, + output_workspace, Jval, Jvar, libsif, diff --git a/test/test_comprehensive.jl b/test/test_comprehensive.jl new file mode 100644 index 00000000..b2ea722b --- /dev/null +++ b/test/test_comprehensive.jl @@ -0,0 +1,83 @@ +using CUTEst +using BenchmarkTools +using LinearAlgebra +using NLPModels + +# Test with an unconstrained problem first +println("Testing with unconstrained problem (ROSENBR):") +nlp1 = CUTEstModel{Float64}("ROSENBR") +x0 = nlp1.meta.x0 + +println("Problem loaded: $(nlp1.meta.name) ($(nlp1.meta.nvar) vars, $(nlp1.meta.ncon) cons)") +vals_h = Vector{Float64}(undef, nlp1.meta.nnzh) +hess_coord!(nlp1, x0, vals_h) + +b_hess = @benchmark hess_coord!($nlp1, $x0, $vals_h) samples=100 evals=5 +println( + "hess_coord!: $(b_hess.allocs) allocations, $(BenchmarkTools.prettytime(median(b_hess).time))", +) + +finalize(nlp1) + +# Test with a constrained problem +println("\nTesting with constrained problem (HS6):") +nlp2 = CUTEstModel{Float64}("HS6") +x0 = nlp2.meta.x0 + +println("Problem loaded: $(nlp2.meta.name) ($(nlp2.meta.nvar) vars, $(nlp2.meta.ncon) cons)") +println("nnzj: $(nlp2.meta.nnzj), nnzh: $(nlp2.meta.nnzh)") + +# Test basic functionality +f = obj(nlp2, x0) +g = similar(x0) +grad!(nlp2, x0, g) +println("obj/grad: f = $(round(f, digits=6)), ||g|| = $(round(norm(g), digits=6))") + +# Test constraint functions +c = Vector{Float64}(undef, nlp2.meta.ncon) +cons!(nlp2, x0, c) +println("constraints: ||c|| = $(round(norm(c), digits=6))") + +# Test cons_coord - this should show major allocation improvements +println("\nTesting cons_coord allocation improvements:") +c1, rows1, cols1, vals1 = cons_coord(nlp2, x0) + +# Benchmark cons_coord +b_cons = @benchmark cons_coord($nlp2, $x0) samples=100 evals=5 +println( + "cons_coord: $(b_cons.allocs) allocations, $(BenchmarkTools.prettytime(median(b_cons).time))", +) +println("Memory: $(BenchmarkTools.prettymemory(median(b_cons).memory))") +println("Returned $(length(vals1)) Jacobian elements") + +# Test cons_coord! +rows = Vector{Cint}(undef, nlp2.meta.nnzj) +cols = Vector{Cint}(undef, nlp2.meta.nnzj) +vals = Vector{Float64}(undef, nlp2.meta.nnzj) +c_out = Vector{Float64}(undef, nlp2.meta.ncon) + +b_cons_inplace = @benchmark cons_coord!($nlp2, $x0, $c_out, $rows, $cols, $vals) samples=100 evals=5 +println( + "cons_coord!: $(b_cons_inplace.allocs) allocations, $(BenchmarkTools.prettytime(median(b_cons_inplace).time))", +) + +# Test type conversion +println("\nTesting type conversion improvements:") +x0_f32 = Float32.(x0) +g_f32 = Vector{Float32}(undef, nlp2.meta.nvar) + +b_grad_conv = @benchmark grad!($nlp2, $x0_f32, $g_f32) samples=100 evals=5 +println("grad! with Float32->Float64 conversion: $(b_grad_conv.allocs) allocations") +println("Time: $(BenchmarkTools.prettytime(median(b_grad_conv).time))") + +# Test hess_coord with constraints +vals_h2 = Vector{Float64}(undef, nlp2.meta.nnzh) +y = zeros(nlp2.meta.ncon) +hess_coord!(nlp2, x0, y, vals_h2) + +b_hess2 = @benchmark hess_coord!($nlp2, $x0, $y, $vals_h2) samples=100 evals=5 +println( + "hess_coord! (constrained): $(b_hess2.allocs) allocations, $(BenchmarkTools.prettytime(median(b_hess2).time))", +) + +finalize(nlp2)