Skip to content

Commit 2ea86c9

Browse files
committed
Add support for Jacobian-vector products
1 parent 52f854c commit 2ea86c9

File tree

11 files changed

+589
-137
lines changed

11 files changed

+589
-137
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ NLSolversBase.jl is the core, common dependency of several packages in the [Juli
1919
The package aims at establishing common ground for [Optim.jl](https://github.com/JuliaNLSolvers/Optim.jl), [LineSearches.jl](https://github.com/JuliaNLSolvers/LineSearches.jl), and [NLsolve.jl](https://github.com/JuliaNLSolvers/NLsolve.jl). The common ground is mainly the types used to hold objective related callables, information about the objectives, and an interface to interact with these types.
2020

2121
## NDifferentiable
22-
There are currently three main types: `NonDifferentiable`, `OnceDifferentiable`, and `TwiceDifferentiable`. There's also a more experimental `TwiceDifferentiableHV` for optimization algorithms that use Hessian-vector products. An `NDifferentiable` instance can be used to hold relevant functions for
22+
There are currently three main types: `NonDifferentiable`, `OnceDifferentiable`, and `TwiceDifferentiable`. An `NDifferentiable` instance can be used to hold relevant functions for
2323

2424
- Optimization: $F : \mathbb{R}^N \to \mathbb{R}$
2525
- Solving systems of equations: $F : \mathbb{R}^N \to \mathbb{R}^N$

src/NLSolversBase.jl

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,23 @@ export AbstractObjective,
99
NonDifferentiable,
1010
OnceDifferentiable,
1111
TwiceDifferentiable,
12-
TwiceDifferentiableHV,
1312
value,
1413
value!,
1514
value_gradient!,
1615
value_jacobian!,
16+
value_jvp!,
1717
gradient,
1818
gradient!,
1919
jacobian,
2020
jacobian!,
21+
jvp,
22+
jvp!,
2123
hessian,
2224
hessian!,
2325
value!!,
2426
value_gradient!!,
2527
value_jacobian!!,
2628
hessian!!,
27-
hv_product,
2829
hv_product!,
2930
only_fg!,
3031
only_fgh!,
@@ -38,6 +39,7 @@ export AbstractObjective,
3839
clear!,
3940
f_calls,
4041
g_calls,
42+
jvp_calls,
4143
h_calls,
4244
hv_calls
4345

@@ -51,12 +53,11 @@ include("objective_types/abstract.jl")
5153
include("objective_types/nondifferentiable.jl")
5254
include("objective_types/oncedifferentiable.jl")
5355
include("objective_types/twicedifferentiable.jl")
54-
include("objective_types/twicedifferentiablehv.jl")
5556
include("objective_types/incomplete.jl")
5657
include("objective_types/constraints.jl")
5758
include("interface.jl")
5859

5960
NonDifferentiable(f::OnceDifferentiable, x::AbstractArray) = NonDifferentiable(f.f, x, copy(f.F))
6061
NonDifferentiable(f::TwiceDifferentiable, x::AbstractArray) = NonDifferentiable(f.f, x, copy(f.F))
61-
NonDifferentiable(f::TwiceDifferentiableHV, x::AbstractArray) = NonDifferentiable(f.f, x, copy(f.F))
62+
6263
end # module

src/interface.jl

Lines changed: 139 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,19 @@ value(obj::AbstractObjective) = obj.F
103103
gradient(obj::AbstractObjective) = obj.DF
104104
"Get the most recently evaluated Jacobian of `obj`."
105105
jacobian(obj::AbstractObjective) = obj.DF
106+
"""
107+
jvp(obj::Union{OnceDifferentiable,TwiceDifferentiable})
108+
109+
Get the most recently evaluated Jacobian-vector product of `obj`.
110+
111+
!!! warn
112+
Generally, it is unsafe to rely on the state of `obj`.
113+
This function should only be used to, e.g., check whether the
114+
most recently evaluated Jacobian-vector product is finite.
115+
In most cases one should use [`jvp!(obj, x, v)`](@ref)
116+
instead of this function.
117+
"""
118+
jvp(obj::Union{OnceDifferentiable, TwiceDifferentiable}) = obj.JVP
106119
"Get the `i`th element of the most recently evaluated gradient of `obj`."
107120
gradient(obj::AbstractObjective, i::Integer) = obj.DF[i]
108121
"Get the most recently evaluated Hessian of `obj`"
@@ -153,6 +166,92 @@ function jacobian(obj::AbstractObjective, x)
153166
return newdf
154167
end
155168

169+
"""
170+
jvp!(obj::Union{OnceDifferentiable, TwiceDifferentiable}, x::AbstractArray, v::AbstractArray)
171+
172+
Return the Jacobian-vector product of the objective function `obj` at point `x` with tangents `v`,
173+
and cache the results in `obj`.
174+
175+
!!! note
176+
This function does use cached results if available.
177+
"""
178+
function jvp!(obj::Union{OnceDifferentiable, TwiceDifferentiable}, x::AbstractArray, v::AbstractArray)
179+
if x != obj.x_jvp || v != obj.v_jvp
180+
jvp!!(obj, x, v)
181+
end
182+
return obj.JVP
183+
end
184+
185+
"""
186+
jvp!!(obj::Union{OnceDifferentiable, TwiceDifferentiable}, x::AbstractArray, v::AbstractArray)
187+
188+
Return the Jacobian-vector product of the objective function `obj` at point `x` with tangents `v`,
189+
and cache the results in `obj`.
190+
191+
!!! note
192+
This function does not use cached results but forces reevaluation of the Jacobian-vector product.
193+
"""
194+
function jvp!!(obj::Union{OnceDifferentiable, TwiceDifferentiable}, x::AbstractArray, v::AbstractArray)
195+
if obj.JVP isa Real
196+
obj.JVP = obj.jvp(x, v)
197+
else
198+
obj.jvp(obj.JVP, x, v)
199+
end
200+
copyto!(obj.x_jvp, x)
201+
copyto!(obj.v_jvp, v)
202+
obj.jvp_calls += 1
203+
return obj.JVP
204+
end
205+
206+
"""
207+
value_jvp!(obj::Union{OnceDifferentiable, TwiceDifferentiable}, x::AbstractArray, v::AbstractArray)
208+
209+
Return the value and the Jacobian-vector product of the objective function `obj` at point `x` with tangents `v`,
210+
and cache the results in `obj`.
211+
212+
!!! note
213+
This function does use cached results if available.
214+
"""
215+
function value_jvp!(obj::Union{OnceDifferentiable, TwiceDifferentiable}, x::AbstractArray, v::AbstractArray)
216+
if x != obj.x_f
217+
if x != obj.x_jvp || v != obj.v_jvp
218+
# Both value and Jacobian-vector product have to be recomputed
219+
value_jvp!!(obj, x, v)
220+
else
221+
# Only value has to be recomputed
222+
value!!(obj, x)
223+
end
224+
elseif x != obj.x_jvp || v != obj.v_jvp
225+
jvp!!(obj, x, v)
226+
end
227+
return obj.F, obj.JVP
228+
end
229+
230+
"""
231+
value_jvp!!(obj::Union{OnceDifferentiable, TwiceDifferentiable}, x::AbstractArray, v::AbstractArray)
232+
233+
Return the value and the Jacobian-vector product of the objective function `obj` at point `x` with tangents `v`,
234+
and cache the results in `obj`.
235+
236+
!!! note
237+
This function does not use cached results but forces reevaluation of the Jacobian-vector product.
238+
"""
239+
function value_jvp!!(obj::Union{OnceDifferentiable,TwiceDifferentiable}, x::AbstractArray, v::AbstractArray)
240+
if obj.F isa Real
241+
y, ty = obj.fjvp(x, v)
242+
obj.F = y
243+
obj.JVP = ty
244+
else
245+
obj.fjvp(obj.F, obj.JVP, x, v)
246+
end
247+
copyto!(obj.x_f, x)
248+
copyto!(obj.x_jvp, x)
249+
copyto!(obj.v_jvp, v)
250+
obj.f_calls += 1
251+
obj.jvp_calls += 1
252+
return obj.F, obj.JVP
253+
end
254+
156255
value(obj::NonDifferentiable{TF, TX}, x) where {TF<:AbstractArray, TX} = value(obj, copy(obj.F), x)
157256
value(obj::OnceDifferentiable{TF, TDF, TX}, x) where {TF<:AbstractArray, TDF, TX} = value(obj, copy(obj.F), x)
158257
function value(obj::AbstractObjective, F, x)
@@ -170,6 +269,25 @@ function value!!(obj::AbstractObjective, F, x)
170269
F
171270
end
172271

272+
function hv_product!(obj::TwiceDifferentiable, x, v)
273+
if x != obj.x_hv || v != obj.v_hv
274+
hv_product!!(obj, x, v)
275+
end
276+
obj.Hv
277+
end
278+
function hv_product!!(obj::TwiceDifferentiable, x, v)
279+
if obj.hv === nothing
280+
H = hessian!(obj, x)
281+
LinearAlgebra.mul!(obj.Hv, H, v)
282+
else
283+
obj.hv_calls += 1
284+
obj.hv(obj.Hv, x, v)
285+
end
286+
copyto!(obj.x_hv, x)
287+
copyto!(obj.v_hv, v)
288+
return obj.Hv
289+
end
290+
173291
function _clear_f!(d::AbstractObjective)
174292
d.f_calls = 0
175293
if d.F isa AbstractArray
@@ -188,6 +306,18 @@ function _clear_df!(d::AbstractObjective)
188306
nothing
189307
end
190308

309+
function _clear_jvp!(d::AbstractObjective)
310+
d.jvp_calls = 0
311+
if d.JVP isa AbstractArray
312+
fill!(d.JVP, NaN)
313+
else
314+
d.JVP = NaN
315+
end
316+
fill!(d.x_jvp, NaN)
317+
fill!(d.v_jvp, NaN)
318+
nothing
319+
end
320+
191321
function _clear_h!(d::AbstractObjective)
192322
d.h_calls = 0
193323
fill!(d.H, NaN)
@@ -208,28 +338,25 @@ clear!(d::NonDifferentiable) = _clear_f!(d)
208338
function clear!(d::OnceDifferentiable)
209339
_clear_f!(d)
210340
_clear_df!(d)
341+
_clear_jvp!(d)
211342
nothing
212343
end
213344

214345
function clear!(d::TwiceDifferentiable)
215346
_clear_f!(d)
216347
_clear_df!(d)
348+
_clear_jvp!(d)
217349
_clear_h!(d)
218-
nothing
219-
end
220-
221-
function clear!(d::TwiceDifferentiableHV)
222-
_clear_f!(d)
223-
_clear_df!(d)
224350
_clear_hv!(d)
225351
nothing
226352
end
227353

354+
f_calls(d::Union{NonDifferentiable, OnceDifferentiable, TwiceDifferentiable}) = d.f_calls
228355
g_calls(d::NonDifferentiable) = 0
356+
g_calls(d::Union{OnceDifferentiable, TwiceDifferentiable}) = d.df_calls
357+
jvp_calls(d::NonDifferentiable) = 0
358+
jvp_calls(d::Union{OnceDifferentiable, TwiceDifferentiable}) = d.jvp_calls
229359
h_calls(d::Union{NonDifferentiable, OnceDifferentiable}) = 0
230-
f_calls(d) = d.f_calls
231-
g_calls(d) = d.df_calls
232-
h_calls(d) = d.h_calls
233-
hv_calls(d) = 0
234-
h_calls(d::TwiceDifferentiableHV) = 0
235-
hv_calls(d::TwiceDifferentiableHV) = d.hv_calls
360+
h_calls(d::TwiceDifferentiable) = d.h_calls
361+
hv_calls(d::Union{NonDifferentiable, OnceDifferentiable}) = 0
362+
hv_calls(d::TwiceDifferentiable) = d.hv_calls

src/objective_types/abstract.jl

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,45 @@ function make_fdf(x, F::Number, f, g!)
1515
end
1616
end
1717

18+
# Given Julia functions of the gradient/Jacobian, create a function that calculates the Jacobian-vector product.
19+
function make_jvp(x::AbstractArray, F::AbstractArray, j!)
20+
let j! = j!, jx = alloc_DF(x, F)
21+
function jvp!(jvp, x, v)
22+
j!(jx, x)
23+
LinearAlgebra.mul!(jvp, jx, v)
24+
return jvp
25+
end
26+
end
27+
end
28+
function make_jvp(x::AbstractArray, F::Number, g!)
29+
let g! = g!, gx = alloc_DF(x, F)
30+
function jvp(x, v)
31+
g!(gx, x)
32+
return LinearAlgebra.dot(gx, v)
33+
end
34+
end
35+
end
36+
37+
# Given a Julia function of the objective function and its gradient/Jacobian, create a function that calculates the function value and the Jacobian-vector product.
38+
function make_fjvp(x::AbstractArray, F::AbstractArray, fj!)
39+
let fj! = fj!, jx = alloc_DF(x, F)
40+
function fjvp!(fx, jvp, x, v)
41+
fj!(fx, jx, x)
42+
LinearAlgebra.mul!(jvp, jx, v)
43+
return fx, jvp
44+
end
45+
end
46+
end
47+
function make_fjvp(x::AbstractArray, F::Number, fg!)
48+
let fg! = fg!, gx = alloc_DF(x, F)
49+
function jvp(x, v)
50+
fx = fg!(gx, x)
51+
return fx, LinearAlgebra.dot(gx, v)
52+
end
53+
end
54+
end
55+
56+
1857
# Initialize an n-by-n Jacobian
1958
alloc_DF(x, F) = eltype(x)(NaN) .* vec(F) .* vec(x)'
2059

0 commit comments

Comments
 (0)