Skip to content

Commit 550624f

Browse files
committed
Compute JVP in line searches
1 parent 67320a2 commit 550624f

File tree

18 files changed

+146
-121
lines changed

18 files changed

+146
-121
lines changed

.github/workflows/CI.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ jobs:
1111
strategy:
1212
matrix:
1313
version:
14-
- "min"
15-
- "lts"
14+
# - "min"
15+
# - "lts"
1616
- "1"
1717
os:
1818
- ubuntu-latest

Project.toml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ uuid = "429524aa-4258-5aef-a3af-852621145aeb"
33
version = "1.14.0"
44

55
[deps]
6+
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
67
EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
78
FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
89
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
@@ -35,7 +36,7 @@ LineSearches = "7.4.0"
3536
LinearAlgebra = "<0.0.1, 1.6"
3637
MathOptInterface = "1.17"
3738
Measurements = "2.14.1"
38-
NLSolversBase = "7.9.0"
39+
NLSolversBase = "8"
3940
NaNMath = "0.3.2, 1"
4041
OptimTestProblems = "2.0.3"
4142
PositiveFactorizations = "0.2.2"
@@ -50,7 +51,6 @@ Test = "<0.0.1, 1.6"
5051
julia = "1.10"
5152

5253
[extras]
53-
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
5454
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
5555
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
5656
ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
@@ -68,4 +68,8 @@ StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
6868
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
6969

7070
[targets]
71-
test = ["Test", "Aqua", "Distributions", "ExplicitImports", "JET", "MathOptInterface", "Measurements", "OptimTestProblems", "Random", "RecursiveArrayTools", "StableRNGs", "LineSearches", "NLSolversBase", "PositiveFactorizations", "ReverseDiff", "ADTypes"]
71+
test = ["Test", "Aqua", "Distributions", "ExplicitImports", "JET", "MathOptInterface", "Measurements", "OptimTestProblems", "Random", "RecursiveArrayTools", "StableRNGs", "LineSearches", "NLSolversBase", "PositiveFactorizations", "ReverseDiff"]
72+
73+
[sources]
74+
LineSearches = { url = "https://github.com/devmotion/LineSearches.jl.git", rev = "dmw/jvp" }
75+
NLSolversBase = { url = "https://github.com/devmotion/NLSolversBase.jl.git", rev = "dmw/jvp" }

docs/src/examples/ipnewton_basics.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
# constraint is unbounded from below or above respectively.
2323

2424
using Optim, NLSolversBase #hide
25+
import ADTypes #hide
2526
import NLSolversBase: clear! #hide
2627

2728
# # Constrained optimization with `IPNewton`
@@ -78,7 +79,7 @@ using Test #src
7879
@test Optim.converged(res) #src
7980
@test Optim.minimum(res) 0.25 #src
8081

81-
# Like the rest of Optim, you can also use `autodiff=:forward` and just pass in
82+
# Like the rest of Optim, you can also use `autodiff=ADTypes.AutoForwardDiff()` and just pass in
8283
# `fun`.
8384

8485
# If we only want to set lower bounds, use `ux = fill(Inf, 2)`

docs/src/examples/maxlikenlm.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
using Optim, NLSolversBase
2323
using LinearAlgebra: diag
24-
using ForwardDiff
24+
using ADTypes, ForwardDiff
2525

2626
#md # !!! tip
2727
#md # Add Optim with the following command at the Julia command prompt:
@@ -152,7 +152,7 @@ end
152152
func = TwiceDifferentiable(
153153
vars -> Log_Likelihood(x, y, vars[1:nvar], vars[nvar+1]),
154154
ones(nvar + 1);
155-
autodiff = :forward,
155+
autodiff = AutoForwardDiff(),
156156
);
157157

158158
# The above statment accepts 4 inputs: the x matrix, the dependent
@@ -163,7 +163,7 @@ func = TwiceDifferentiable(
163163
# the error variance.
164164
#
165165
# The `ones(nvar+1)` are the starting values for the parameters and
166-
# the `autodiff=:forward` command performs forward mode automatic
166+
# the `autodiff=AutoForwardDiff()` command performs forward mode automatic
167167
# differentiation.
168168
#
169169
# The actual optimization of the likelihood function is accomplished

docs/src/user/gradientsandhessians.md

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,8 @@ Automatic differentiation techniques are a middle ground between finite differen
1616

1717
Reverse-mode automatic differentiation can be seen as an automatic implementation of the adjoint method mentioned above, and requires a runtime comparable to only one evaluation of ``f``. It is however considerably more complex to implement, requiring to record the execution of the program to then run it backwards, and incurs a larger overhead.
1818

19-
Forward-mode automatic differentiation is supported through the [ForwardDiff.jl](https://github.com/JuliaDiff/ForwardDiff.jl) package by providing the `autodiff=:forward` keyword to `optimize`.
20-
More generic automatic differentiation is supported thanks to [DifferentiationInterface.jl](https://github.com/JuliaDiff/DifferentiationInterface.jl), by setting `autodiff` to any compatible backend object from [ADTypes.jl](https://github.com/SciML/ADTypes.jl).
21-
For instance, the user can choose `autodiff=AutoReverseDiff()`, `autodiff=AutoEnzyme()`, `autodiff=AutoMooncake()` or `autodiff=AutoZygote()` for a reverse-mode gradient computation, which is generally faster than forward mode on large inputs.
19+
Generic automatic differentiation is supported thanks to [DifferentiationInterface.jl](https://github.com/JuliaDiff/DifferentiationInterface.jl), by setting `autodiff` to any compatible backend object from [ADTypes.jl](https://github.com/SciML/ADTypes.jl).
20+
For instance, the user can choose `autodiff=AutoForwardDiff()` for forward-mode gradient computation or `autodiff=AutoReverseDiff()`, `autodiff=AutoEnzyme()`, `autodiff=AutoMooncake()` or `autodiff=AutoZygote()` for a reverse-mode gradient computation, which is generally faster than forward mode on large inputs.
2221
Each of these choices requires loading the corresponding package beforehand.
2322

2423
## Example
@@ -66,14 +65,16 @@ julia> Optim.minimizer(optimize(f, initial_x, BFGS()))
6665
```
6766
Still looks good. Returning to automatic differentiation, let us try both solvers using this
6867
method. We enable [forward mode](https://github.com/JuliaDiff/ForwardDiff.jl) automatic
69-
differentiation by using the `autodiff = :forward` keyword.
68+
differentiation by using the `autodiff = ADTypes.AutoForwardDiff()` keyword.
7069
```jlcon
71-
julia> Optim.minimizer(optimize(f, initial_x, BFGS(); autodiff = :forward))
70+
julia> using ADTypes: AutoForwardDiff
71+
72+
julia> Optim.minimizer(optimize(f, initial_x, BFGS(); autodiff = AutoForwardDiff()))
7273
2-element Array{Float64,1}:
7374
1.0
7475
1.0
7576
76-
julia> Optim.minimizer(optimize(f, initial_x, Newton(); autodiff = :forward))
77+
julia> Optim.minimizer(optimize(f, initial_x, Newton(); autodiff = AutoForwardDiff()))
7778
2-element Array{Float64,1}:
7879
1.0
7980
1.0

docs/src/user/minimization.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,10 @@ If we pass `f` alone, Optim will construct an approximate gradient for us using
2626
```jl
2727
optimize(f, x0, LBFGS())
2828
```
29-
For better performance and greater precision, you can pass your own gradient function. If your objective is written in all Julia code with no special calls to external (that is non-Julia) libraries, you can also use automatic differentiation, by using the `autodiff` keyword and setting it to `:forward`:
29+
For better performance and greater precision, you can pass your own gradient function. If your objective is written in all Julia code with no special calls to external (that is non-Julia) libraries, you can also use automatic differentiation, by using the `autodiff` keyword and setting it to `ADTypes.AutoForwardDiff()`:
3030
```julia
31-
optimize(f, x0, LBFGS(); autodiff = :forward)
31+
using ADTypes: AutoForwardDiff
32+
optimize(f, x0, LBFGS(); autodiff = AutoForwardDiff())
3233
```
3334

3435
For the Rosenbrock example, the analytical gradient can be shown to be:

ext/OptimMOIExt.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
module OptimMOIExt
22

33
using Optim
4-
using Optim.LinearAlgebra: rmul!
4+
using Optim: ADTypes
5+
using Optim.LinearAlgebra: rmul!
56
import MathOptInterface as MOI
67

78
function __init__()
@@ -333,7 +334,7 @@ function MOI.optimize!(model::Optimizer{T}) where {T}
333334
inplace = true,
334335
)
335336
else
336-
d = Optim.promote_objtype(method, initial_x, :finite, true, f, g!, h!)
337+
d = Optim.promote_objtype(method, initial_x, ADTypes.AutoFiniteDiff(; fdtype = Val(:central)), true, f, g!, h!)
337338
options = Optim.Options(; Optim.default_options(method)..., options...)
338339
if nl_constrained || has_bounds
339340
if nl_constrained

src/Manifolds.jl

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ project_tangent(M::Manifold, x) = project_tangent!(M, similar(x), x)
2020
retract(M::Manifold, x) = retract!(M, copy(x))
2121

2222
# Fake objective function implementing a retraction
23-
mutable struct ManifoldObjective{T<:NLSolversBase.AbstractObjective} <:
23+
mutable struct ManifoldObjective{M<:Manifold,T<:NLSolversBase.AbstractObjective} <:
2424
NLSolversBase.AbstractObjective
25-
manifold::Manifold
25+
manifold::M
2626
inner_obj::T
2727
end
2828
# TODO: is it safe here to call retract! and change x?
@@ -52,6 +52,20 @@ function NLSolversBase.value_gradient!(obj::ManifoldObjective, x)
5252
return value(obj.inner_obj)
5353
end
5454

55+
# In general, we have to compute the gradient/Jacobian separately as it has to be projected
56+
function NLSolversBase.jvp!(obj::ManifoldObjective, x, v)
57+
xin = retract(obj.manifold, x)
58+
gradient!(obj.inner_obj, xin)
59+
project_tangent!(obj.manifold, gradient(obj.inner_obj), xin)
60+
return dot(gradient(obj.inner_obj), v)
61+
end
62+
function NLSolversBase.value_jvp!(obj::ManifoldObjective, x, v)
63+
xin = retract(obj.manifold, x)
64+
value_gradient!(obj.inner_obj, xin)
65+
project_tangent!(obj.manifold, gradient(obj.inner_obj), xin)
66+
return value(obj.inner_obj), dot(gradient(obj.inner_obj), v)
67+
end
68+
5569
"""Flat Euclidean space {R,C}^N, with projections equal to the identity."""
5670
struct Flat <: Manifold end
5771
# all the functions below are no-ops, and therefore the generated code
@@ -62,6 +76,10 @@ retract!(M::Flat, x) = x
6276
project_tangent(M::Flat, g, x) = g
6377
project_tangent!(M::Flat, g, x) = g
6478

79+
# Optimizations for `Flat` manifold
80+
NLSolversBase.jvp!(obj::ManifoldObjective{Flat}, x, v) = jvp!(obj.inner_obj, x, v)
81+
NLSolversBase.value_jvp!(obj::ManifoldObjective{Flat}, x, v) = value_jvp!(obj.inner_obj, x, v)
82+
6583
"""Spherical manifold {|x| = 1}."""
6684
struct Sphere <: Manifold end
6785
retract!(S::Sphere, x) = (x ./= norm(x))

src/Optim.jl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ documentation online at http://julianlsolvers.github.io/Optim.jl/stable/ .
1616
"""
1717
module Optim
1818

19+
import ADTypes
20+
1921
using PositiveFactorizations: Positive # for globalization strategy in Newton
2022

2123
using LineSearches: LineSearches # for globalization strategy in Quasi-Newton algs
@@ -41,6 +43,15 @@ using NLSolversBase:
4143
TwiceDifferentiableConstraints,
4244
nconstraints,
4345
nconstraints_x,
46+
value,
47+
value!,
48+
value!!,
49+
gradient,
50+
gradient!,
51+
value_gradient!,
52+
value_gradient!!,
53+
jvp!,
54+
value_jvp!,
4455
hessian,
4556
hessian!,
4657
hessian!!,

0 commit comments

Comments
 (0)