Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 3 additions & 7 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@ uuid = "429524aa-4258-5aef-a3af-852621145aeb"
version = "1.14.0"

[deps]
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
NLSolversBase = "d41bc354-129a-5804-8e4c-c37616107c6c"
NaNMath = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
PositiveFactorizations = "85a6dd25-e78a-55b7-8502-1745935b8125"
Expand Down Expand Up @@ -50,22 +49,19 @@ Test = "<0.0.1, 1.6"
julia = "1.10"

[extras]
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7"
NLSolversBase = "d41bc354-129a-5804-8e4c-c37616107c6c"
OptimTestProblems = "cec144fc-5a64-5bc6-99fb-dde8f63e154c"
PositiveFactorizations = "85a6dd25-e78a-55b7-8502-1745935b8125"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd"
ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test", "Aqua", "Distributions", "ExplicitImports", "JET", "MathOptInterface", "Measurements", "OptimTestProblems", "Random", "RecursiveArrayTools", "StableRNGs", "LineSearches", "NLSolversBase", "PositiveFactorizations", "ReverseDiff", "ADTypes"]
test = ["Test", "Aqua", "Distributions", "ExplicitImports", "ForwardDiff", "JET", "MathOptInterface", "Measurements", "OptimTestProblems", "Random", "RecursiveArrayTools", "StableRNGs", "ReverseDiff"]
1 change: 1 addition & 0 deletions docs/Project.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[deps]
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
Expand Down
2 changes: 1 addition & 1 deletion docs/src/examples/ipnewton_basics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ using Test #src
@test Optim.converged(res) #src
@test Optim.minimum(res) ≈ 0.25 #src

# Like the rest of Optim, you can also use `autodiff=:forward` and just pass in
# Like the rest of Optim, you can also use `autodiff=ADTypes.AutoForwardDiff()` and just pass in
# `fun`.

# If we only want to set lower bounds, use `ux = fill(Inf, 2)`
Expand Down
5 changes: 3 additions & 2 deletions docs/src/examples/maxlikenlm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
using Optim, NLSolversBase
using LinearAlgebra: diag
using ForwardDiff
using ADTypes: AutoForwardDiff

#md # !!! tip
#md # Add Optim with the following command at the Julia command prompt:
Expand Down Expand Up @@ -152,7 +153,7 @@ end
func = TwiceDifferentiable(
vars -> Log_Likelihood(x, y, vars[1:nvar], vars[nvar+1]),
ones(nvar + 1);
autodiff = :forward,
autodiff = AutoForwardDiff(),
);

# The above statment accepts 4 inputs: the x matrix, the dependent
Expand All @@ -163,7 +164,7 @@ func = TwiceDifferentiable(
# the error variance.
#
# The `ones(nvar+1)` are the starting values for the parameters and
# the `autodiff=:forward` command performs forward mode automatic
# the `autodiff=ADTypes.AutoForwardDiff()` command performs forward mode automatic
# differentiation.
#
# The actual optimization of the likelihood function is accomplished
Expand Down
16 changes: 9 additions & 7 deletions docs/src/user/gradientsandhessians.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ Automatic differentiation techniques are a middle ground between finite differen

Reverse-mode automatic differentiation can be seen as an automatic implementation of the adjoint method mentioned above, and requires a runtime comparable to only one evaluation of ``f``. It is however considerably more complex to implement, requiring to record the execution of the program to then run it backwards, and incurs a larger overhead.

Forward-mode automatic differentiation is supported through the [ForwardDiff.jl](https://github.com/JuliaDiff/ForwardDiff.jl) package by providing the `autodiff=:forward` keyword to `optimize`.
More generic automatic differentiation is supported thanks to [DifferentiationInterface.jl](https://github.com/JuliaDiff/DifferentiationInterface.jl), by setting `autodiff` to any compatible backend object from [ADTypes.jl](https://github.com/SciML/ADTypes.jl).
For instance, the user can choose `autodiff=AutoReverseDiff()`, `autodiff=AutoEnzyme()`, `autodiff=AutoMooncake()` or `autodiff=AutoZygote()` for a reverse-mode gradient computation, which is generally faster than forward mode on large inputs.
Each of these choices requires loading the corresponding package beforehand.
Generic automatic differentiation is supported thanks to [DifferentiationInterface.jl](https://github.com/JuliaDiff/DifferentiationInterface.jl), by setting `autodiff` to any compatible backend object from [ADTypes.jl](https://github.com/SciML/ADTypes.jl).
For instance, forward-mode automatic differentiation through the [ForwardDiff.jl](https://github.com/JuliaDiff/ForwardDiff.jl) package by providing the `autodiff=ADTypes.AutoForwardDiff()` keyword to `optimize`.
Additionally, the user can choose `autodiff=AutoReverseDiff()`, `autodiff=AutoEnzyme()`, `autodiff=AutoMooncake()` or `autodiff=AutoZygote()` for a reverse-mode gradient computation, which is generally faster than forward mode on large inputs.
Each of these choices requires loading the `ADTypes` package and the corresponding automatic differentiation package (e.g., `ForwardDiff` or `ReverseDiff`) beforehand.

## Example

Expand Down Expand Up @@ -66,14 +66,16 @@ julia> Optim.minimizer(optimize(f, initial_x, BFGS()))
```
Still looks good. Returning to automatic differentiation, let us try both solvers using this
method. We enable [forward mode](https://github.com/JuliaDiff/ForwardDiff.jl) automatic
differentiation by using the `autodiff = :forward` keyword.
differentiation by using the `autodiff = AutoForwardDiff()` keyword.
```jlcon
julia> Optim.minimizer(optimize(f, initial_x, BFGS(); autodiff = :forward))
julia> using ADTypes: AutoForwardDiff

julia> Optim.minimizer(optimize(f, initial_x, BFGS(); autodiff = AutoForwardDiff()))
2-element Array{Float64,1}:
1.0
1.0

julia> Optim.minimizer(optimize(f, initial_x, Newton(); autodiff = :forward))
julia> Optim.minimizer(optimize(f, initial_x, Newton(); autodiff = AutoForwardDiff()))
2-element Array{Float64,1}:
1.0
1.0
Expand Down
5 changes: 3 additions & 2 deletions docs/src/user/minimization.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@ If we pass `f` alone, Optim will construct an approximate gradient for us using
```jl
optimize(f, x0, LBFGS())
```
For better performance and greater precision, you can pass your own gradient function. If your objective is written in all Julia code with no special calls to external (that is non-Julia) libraries, you can also use automatic differentiation, by using the `autodiff` keyword and setting it to `:forward`:
For better performance and greater precision, you can pass your own gradient function. If your objective is written in all Julia code with no special calls to external (that is non-Julia) libraries, you can also use automatic differentiation, by using the `autodiff` keyword and setting it to `AutoForwardDiff()`:
```julia
optimize(f, x0, LBFGS(); autodiff = :forward)
using ADTypes: AutoForwardDiff
optimize(f, x0, LBFGS(); autodiff = AutoForwardDiff())
```

For the Rosenbrock example, the analytical gradient can be shown to be:
Expand Down
2 changes: 1 addition & 1 deletion ext/OptimMOIExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ function MOI.optimize!(model::Optimizer{T}) where {T}
inplace = true,
)
else
d = Optim.promote_objtype(method, initial_x, :finite, true, f, g!, h!)
d = Optim.promote_objtype(method, initial_x, Optim.DEFAULT_AD_TYPE, true, f, g!, h!)
options = Optim.Options(; Optim.default_options(method)..., options...)
if nl_constrained || has_bounds
if nl_constrained
Expand Down
2 changes: 2 additions & 0 deletions src/Optim.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ using NLSolversBase:
# var for NelderMead
import StatsBase: var

import ADTypes

using LinearAlgebra:
LinearAlgebra,
Diagonal,
Expand Down
83 changes: 43 additions & 40 deletions src/multivariate/optimize/interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ fallback_method(f) = NelderMead()
fallback_method(f, g!) = LBFGS()
fallback_method(f, g!, h!) = Newton()

# By default, use central finite difference method
const DEFAULT_AD_TYPE = ADTypes.AutoFiniteDiff(; fdtype = Val(:central))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One could consider switching to a different default AD backend (maybe ForwardDiff for univariate optimization and e.g. Mooncake - if at least all tests pass - for multivariate optimization problems? - but the choice for when to switch to which backend is likely also problem/dimension dependent, see also https://docs.sciml.ai/Optimization/stable/API/ad/#ad). But I think it would be better to make such more fundamental changes in a separate PR.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree both to switch to reverse mode for multivariate and forward for univariate, but also that it's probably a separate PR


function fallback_method(f::InplaceObjective)
if !(f.fdf isa Nothing)
if !(f.hv isa Nothing)
Expand Down Expand Up @@ -36,48 +39,48 @@ fallback_method(d::OnceDifferentiable) = LBFGS()
fallback_method(d::TwiceDifferentiable) = Newton()

# promote the objective (tuple of callables or an AbstractObjective) according to method requirement
promote_objtype(method, initial_x, autodiff, inplace::Bool, args...) =
promote_objtype(method, initial_x, autodiff::ADTypes.AbstractADType, inplace::Bool, args...) =
error("No default objective type for $method and $args.")
# actual promotions, notice that (args...) captures FirstOrderOptimizer and NonDifferentiable, etc
promote_objtype(method::ZerothOrderOptimizer, x, autodiff, inplace::Bool, args...) =
promote_objtype(method::ZerothOrderOptimizer, x, autodiff::ADTypes.AbstractADType, inplace::Bool, args...) =
NonDifferentiable(args..., x, real(zero(eltype(x))))
promote_objtype(method::FirstOrderOptimizer, x, autodiff, inplace::Bool, f) =
promote_objtype(method::FirstOrderOptimizer, x, autodiff::ADTypes.AbstractADType, inplace::Bool, f) =
OnceDifferentiable(f, x, real(zero(eltype(x))); autodiff = autodiff)
promote_objtype(method::FirstOrderOptimizer, x, autodiff, inplace::Bool, args...) =
promote_objtype(method::FirstOrderOptimizer, x, autodiff::ADTypes.AbstractADType, inplace::Bool, args...) =
OnceDifferentiable(args..., x, real(zero(eltype(x))); inplace = inplace)
promote_objtype(method::FirstOrderOptimizer, x, autodiff, inplace::Bool, f, g, h) =
promote_objtype(method::FirstOrderOptimizer, x, autodiff::ADTypes.AbstractADType, inplace::Bool, f, g, h) =
OnceDifferentiable(f, g, x, real(zero(eltype(x))); inplace = inplace)
promote_objtype(method::SecondOrderOptimizer, x, autodiff, inplace::Bool, f) =
promote_objtype(method::SecondOrderOptimizer, x, autodiff::ADTypes.AbstractADType, inplace::Bool, f) =
TwiceDifferentiable(f, x, real(zero(eltype(x))); autodiff = autodiff)
promote_objtype(
method::SecondOrderOptimizer,
x,
autodiff,
autodiff::ADTypes.AbstractADType,
inplace::Bool,
f::NotInplaceObjective,
) = TwiceDifferentiable(f, x, real(zero(eltype(x))))
promote_objtype(
method::SecondOrderOptimizer,
x,
autodiff,
autodiff::ADTypes.AbstractADType,
inplace::Bool,
f::InplaceObjective,
) = TwiceDifferentiable(f, x, real(zero(eltype(x))))
promote_objtype(
method::SecondOrderOptimizer,
x,
autodiff,
autodiff::ADTypes.AbstractADType,
inplace::Bool,
f::NLSolversBase.InPlaceObjectiveFGHv,
) = TwiceDifferentiableHV(f, x)
promote_objtype(
method::SecondOrderOptimizer,
x,
autodiff,
autodiff::ADTypes.AbstractADType,
inplace::Bool,
f::NLSolversBase.InPlaceObjectiveFG_Hv,
) = TwiceDifferentiableHV(f, x)
promote_objtype(method::SecondOrderOptimizer, x, autodiff, inplace::Bool, f, g) =
promote_objtype(method::SecondOrderOptimizer, x, autodiff::ADTypes.AbstractADType, inplace::Bool, f, g) =
TwiceDifferentiable(
f,
g,
Expand All @@ -86,48 +89,48 @@ promote_objtype(method::SecondOrderOptimizer, x, autodiff, inplace::Bool, f, g)
inplace = inplace,
autodiff = autodiff,
)
promote_objtype(method::SecondOrderOptimizer, x, autodiff, inplace::Bool, f, g, h) =
promote_objtype(method::SecondOrderOptimizer, x, autodiff::ADTypes.AbstractADType, inplace::Bool, f, g, h) =
TwiceDifferentiable(f, g, h, x, real(zero(eltype(x))); inplace = inplace)
# no-op
promote_objtype(
method::ZerothOrderOptimizer,
x,
autodiff,
autodiff::ADTypes.AbstractADType,
inplace::Bool,
nd::NonDifferentiable,
) = nd
promote_objtype(
method::ZerothOrderOptimizer,
x,
autodiff,
autodiff::ADTypes.AbstractADType,
inplace::Bool,
od::OnceDifferentiable,
) = od
promote_objtype(
method::FirstOrderOptimizer,
x,
autodiff,
autodiff::ADTypes.AbstractADType,
inplace::Bool,
od::OnceDifferentiable,
) = od
promote_objtype(
method::ZerothOrderOptimizer,
x,
autodiff,
autodiff::ADTypes.AbstractADType,
inplace::Bool,
td::TwiceDifferentiable,
) = td
promote_objtype(
method::FirstOrderOptimizer,
x,
autodiff,
autodiff::ADTypes.AbstractADType,
inplace::Bool,
td::TwiceDifferentiable,
) = td
promote_objtype(
method::SecondOrderOptimizer,
x,
autodiff,
autodiff::ADTypes.AbstractADType,
inplace::Bool,
td::TwiceDifferentiable,
) = td
Expand All @@ -136,8 +139,8 @@ promote_objtype(
function optimize(
f,
initial_x::AbstractArray;
inplace = true,
autodiff = :finite,
inplace::Bool = true,
autodiff::ADTypes.AbstractADType = DEFAULT_AD_TYPE,
)
method = fallback_method(f)
d = promote_objtype(method, initial_x, autodiff, inplace, f)
Expand All @@ -149,8 +152,8 @@ function optimize(
f,
g,
initial_x::AbstractArray;
autodiff = :finite,
inplace = true,
autodiff::ADTypes.AbstractADType = DEFAULT_AD_TYPE,
inplace::Bool = true,
)

method = fallback_method(f, g)
Expand All @@ -165,8 +168,8 @@ function optimize(
g,
h,
initial_x::AbstractArray;
inplace = true,
autodiff = :finite
inplace::Bool = true,
autodiff::ADTypes.AbstractADType = DEFAULT_AD_TYPE,
)
method = fallback_method(f, g, h)
d = promote_objtype(method, initial_x, autodiff, inplace, f, g, h)
Expand All @@ -188,8 +191,8 @@ function optimize(
f,
initial_x::AbstractArray,
options::Options;
inplace = true,
autodiff = :finite,
inplace::Bool = true,
autodiff::ADTypes.AbstractADType = DEFAULT_AD_TYPE,
)
method = fallback_method(f)
d = promote_objtype(method, initial_x, autodiff, inplace, f)
Expand All @@ -200,8 +203,8 @@ function optimize(
g,
initial_x::AbstractArray,
options::Options;
inplace = true,
autodiff = :finite,
inplace::Bool = true,
autodiff::ADTypes.AbstractADType = DEFAULT_AD_TYPE,
)

method = fallback_method(f, g)
Expand All @@ -214,8 +217,8 @@ function optimize(
h,
initial_x::AbstractArray{T},
options::Options;
inplace = true,
autodiff = :finite,
inplace::Bool = true,
autodiff::ADTypes.AbstractADType = DEFAULT_AD_TYPE,
) where {T}
method = fallback_method(f, g, h)
d = promote_objtype(method, initial_x, autodiff, inplace, f, g, h)
Expand All @@ -229,8 +232,8 @@ function optimize(
initial_x::AbstractArray,
method::AbstractOptimizer,
options::Options = Options(; default_options(method)...);
inplace = true,
autodiff = :finite,
inplace::Bool = true,
autodiff::ADTypes.AbstractADType = DEFAULT_AD_TYPE,
)
d = promote_objtype(method, initial_x, autodiff, inplace, f)
optimize(d, initial_x, method, options)
Expand All @@ -241,8 +244,8 @@ function optimize(
initial_x::AbstractArray,
method::AbstractOptimizer,
options::Options = Options(; default_options(method)...);
inplace = true,
autodiff = :finite,
inplace::Bool = true,
autodiff::ADTypes.AbstractADType = DEFAULT_AD_TYPE,
)

d = promote_objtype(method, initial_x, autodiff, inplace, f)
Expand All @@ -254,8 +257,8 @@ function optimize(
initial_x::AbstractArray,
method::AbstractOptimizer,
options::Options = Options(; default_options(method)...);
inplace = true,
autodiff = :finite,
inplace::Bool = true,
autodiff::ADTypes.AbstractADType = DEFAULT_AD_TYPE,
)
d = promote_objtype(method, initial_x, autodiff, inplace, f, g)

Expand All @@ -268,8 +271,8 @@ function optimize(
initial_x::AbstractArray,
method::AbstractOptimizer,
options::Options = Options(; default_options(method)...);
inplace = true,
autodiff = :finite,
inplace::Bool = true,
autodiff::ADTypes.AbstractADType = DEFAULT_AD_TYPE,

)
d = promote_objtype(method, initial_x, autodiff, inplace, f, g, h)
Expand All @@ -282,8 +285,8 @@ function optimize(
initial_x::AbstractArray,
method::SecondOrderOptimizer,
options::Options = Options(; default_options(method)...);
inplace = true,
autodiff = :finite,
inplace::Bool = true,
autodiff::ADTypes.AbstractADType = DEFAULT_AD_TYPE,
) where {D<:Union{NonDifferentiable,OnceDifferentiable}}
d = promote_objtype(method, initial_x, autodiff, inplace, d)
optimize(d, initial_x, method, options)
Expand Down
Loading