diff --git a/Project.toml b/Project.toml index ab0bfb2..e8e7aab 100644 --- a/Project.toml +++ b/Project.toml @@ -1,16 +1,16 @@ name = "ProximalAlgorithms" uuid = "140ffc9f-1907-541a-a177-7475e0a401e9" -version = "0.5.5" +version = "0.6.0" [deps] +AbstractDifferentiation = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProximalCore = "dc4f5ac2-75d1-4f31-931e-60435d74994b" -Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] +AbstractDifferentiation = "0.6" LinearAlgebra = "1.2" Printf = "1.2" ProximalCore = "0.1" -Zygote = "0.6" julia = "1.2" diff --git a/README.md b/README.md index 15129be..525ed7b 100644 --- a/README.md +++ b/README.md @@ -11,14 +11,21 @@ A Julia package for non-smooth optimization algorithms. This package provides algorithms for the minimization of objective functions that include non-smooth terms, such as constraints or non-differentiable penalties. Implemented algorithms include: -* (Fast) Proximal gradient methods -* Douglas-Rachford splitting -* Three-term splitting -* Primal-dual splitting algorithms -* Newton-type methods - -This package works well in combination with [ProximalOperators](https://github.com/JuliaFirstOrder/ProximalOperators.jl) (>= 0.15), -which contains a wide range of functions that can be used to express cost terms. +- (Fast) Proximal gradient methods +- Douglas-Rachford splitting +- Three-term splitting +- Primal-dual splitting algorithms +- Newton-type methods + +Check out [this section](https://juliafirstorder.github.io/ProximalAlgorithms.jl/stable/guide/implemented_algorithms/) for an overview of the available algorithms. + +Algorithms rely on: +- [AbstractDifferentiation.jl](https://github.com/JuliaDiff/AbstractDifferentiation.jl) for automatic differentiation +(but you can easily bring your own gradients) +- the [ProximalCore API](https://github.com/JuliaFirstOrder/ProximalCore.jl) for proximal mappings, projections, etc, +to handle non-differentiable terms +(see for example [ProximalOperators](https://github.com/JuliaFirstOrder/ProximalOperators.jl) +for an extensive collection of functions). ## Documentation diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index a63736d..74a3dff 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -8,6 +8,22 @@ using FileIO const SUITE = BenchmarkGroup() +function ProximalAlgorithms.value_and_gradient_closure(f::ProximalOperators.LeastSquaresDirect, x) + res = f.A*x - f.b + norm(res)^2, () -> f.A'*res +end + +struct SquaredDistance{Tb} + b::Tb +end + +(f::SquaredDistance)(x) = norm(x - f.b)^2 + +function ProximalAlgorithms.value_and_gradient_closure(f::SquaredDistance, x) + diff = x - f.b + norm(diff)^2, () -> diff +end + for (benchmark_name, file_name) in [ ("Lasso tiny", joinpath(@__DIR__, "data", "lasso_tiny.jld2")), ("Lasso small", joinpath(@__DIR__, "data", "lasso_small.jld2")), @@ -42,21 +58,21 @@ for (benchmark_name, file_name) in [ SUITE[k]["ZeroFPR"] = @benchmarkable solver(x0=x0, f=f, A=$A, g=g) setup=begin solver = ProximalAlgorithms.ZeroFPR(tol=1e-6) x0 = zeros($T, size($A, 2)) - f = Translate(SqrNormL2(), -$b) + f = SquaredDistance($b) g = NormL1($lam) end SUITE[k]["PANOC"] = @benchmarkable solver(x0=x0, f=f, A=$A, g=g) setup=begin solver = ProximalAlgorithms.PANOC(tol=1e-6) x0 = zeros($T, size($A, 2)) - f = Translate(SqrNormL2(), -$b) + f = SquaredDistance($b) g = NormL1($lam) end SUITE[k]["PANOCplus"] = @benchmarkable solver(x0=x0, f=f, A=$A, g=g) setup=begin solver = ProximalAlgorithms.PANOCplus(tol=1e-6) x0 = zeros($T, size($A, 2)) - f = Translate(SqrNormL2(), -$b) + f = SquaredDistance($b) g = NormL1($lam) end diff --git a/docs/Project.toml b/docs/Project.toml index 708dd2b..f38368c 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,4 +1,5 @@ [deps] +AbstractDifferentiation = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" @@ -7,6 +8,7 @@ Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" ProximalAlgorithms = "140ffc9f-1907-541a-a177-7475e0a401e9" ProximalCore = "dc4f5ac2-75d1-4f31-931e-60435d74994b" ProximalOperators = "a725b495-10eb-56fe-b38b-717eba820537" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] Documenter = "1" diff --git a/docs/src/examples/sparse_linear_regression.jl b/docs/src/examples/sparse_linear_regression.jl index 9664e3b..3552a6d 100644 --- a/docs/src/examples/sparse_linear_regression.jl +++ b/docs/src/examples/sparse_linear_regression.jl @@ -51,10 +51,13 @@ end mean_squared_error(label, output) = mean((output .- label) .^ 2) / 2 +using Zygote +using AbstractDifferentiation: ZygoteBackend using ProximalAlgorithms -training_loss = ProximalAlgorithms.ZygoteFunction( - wb -> mean_squared_error(training_label, standardized_linear_model(wb, training_input)) +training_loss = ProximalAlgorithms.AutoDifferentiable( + wb -> mean_squared_error(training_label, standardized_linear_model(wb, training_input)), + ZygoteBackend() ) # As regularization we will use the L1 norm, implemented in [ProximalOperators](https://github.com/JuliaFirstOrder/ProximalOperators.jl): diff --git a/docs/src/guide/custom_objectives.jl b/docs/src/guide/custom_objectives.jl index 56fc3c7..5189ddb 100644 --- a/docs/src/guide/custom_objectives.jl +++ b/docs/src/guide/custom_objectives.jl @@ -12,29 +12,32 @@ # # Defining the proximal mapping for a custom function type requires adding a method for [`ProximalCore.prox!`](@ref). # -# To compute gradients, ProximalAlgorithms provides a fallback definition for [`ProximalCore.gradient!`](@ref), -# relying on [Zygote](https://github.com/FluxML/Zygote.jl) to use automatic differentiation. -# Therefore, you can provide any (differentiable) Julia function wherever gradients need to be taken, -# and everything will work out of the box. +# To compute gradients, algorithms use [`ProximalAlgorithms.value_and_gradient_closure`](@ref): +# this relies on [AbstractDifferentiation](https://github.com/JuliaDiff/AbstractDifferentiation.jl), for automatic differentiation +# with any of its supported backends, when functions are wrapped in [`ProximalAlgorithms.AutoDifferentiable`](@ref), +# as the examples below show. # -# If however one would like to provide their own gradient implementation (e.g. for efficiency reasons), -# they can simply implement a method for [`ProximalCore.gradient!`](@ref). +# If however you would like to provide your own gradient implementation (e.g. for efficiency reasons), +# you can simply implement a method for [`ProximalAlgorithms.value_and_gradient_closure`](@ref) on your own function type. # # ```@docs # ProximalCore.prox # ProximalCore.prox! -# ProximalCore.gradient -# ProximalCore.gradient! +# ProximalAlgorithms.value_and_gradient_closure +# ProximalAlgorithms.AutoDifferentiable # ``` # # ## Example: constrained Rosenbrock # # Let's try to minimize the celebrated Rosenbrock function, but constrained to the unit norm ball. The cost function is +using Zygote +using AbstractDifferentiation: ZygoteBackend using ProximalAlgorithms -rosenbrock2D = ProximalAlgorithms.ZygoteFunction( - x -> 100 * (x[2] - x[1]^2)^2 + (1 - x[1])^2 +rosenbrock2D = ProximalAlgorithms.AutoDifferentiable( + x -> 100 * (x[2] - x[1]^2)^2 + (1 - x[1])^2, + ZygoteBackend() ) # To enforce the constraint, we define the indicator of the unit ball, together with its proximal mapping: @@ -82,17 +85,23 @@ scatter!([solution[1]], [solution[2]], color=:red, markershape=:star5, label="co mutable struct Counting{T} f::T + eval_count::Int gradient_count::Int prox_count::Int end -Counting(f::T) where T = Counting{T}(f, 0, 0) +Counting(f::T) where T = Counting{T}(f, 0, 0, 0) -# Now we only need to intercept any call to `gradient!` and `prox!` and increase counters there: +# Now we only need to intercept any call to `value_and_gradient_closure` and `prox!` and increase counters there: -function ProximalCore.gradient!(y, f::Counting, x) - f.gradient_count += 1 - return ProximalCore.gradient!(y, f.f, x) +function ProximalAlgorithms.value_and_gradient_closure(f::Counting, x) + f.eval_count += 1 + fx, pb = ProximalAlgorithms.value_and_gradient_closure(f.f, x) + function counting_pullback() + f.gradient_count += 1 + return pb() + end + return fx, counting_pullback end function ProximalCore.prox!(y, f::Counting, x, gamma) @@ -109,5 +118,6 @@ solution, iterations = panoc(x0=-ones(2), f=f, g=g) # and check how many operations where actually performed: -println(f.gradient_count) -println(g.prox_count) +println("function evals: $(f.eval_count)") +println("gradient evals: $(f.gradient_count)") +println(" prox evals: $(g.prox_count)") diff --git a/docs/src/guide/getting_started.jl b/docs/src/guide/getting_started.jl index 228df29..1333bf4 100644 --- a/docs/src/guide/getting_started.jl +++ b/docs/src/guide/getting_started.jl @@ -20,7 +20,7 @@ # The literature on proximal operators and algorithms is vast: for an overview, one can refer to [Parikh2014](@cite), [Beck2017](@cite). # # To evaluate these first-order primitives, in ProximalAlgorithms: -# * ``\nabla f_i`` falls back to using automatic differentiation (as provided by [Zygote](https://github.com/FluxML/Zygote.jl)). +# * ``\nabla f_i`` falls back to using automatic differentiation (as provided by [AbstractDifferentiation](https://github.com/JuliaDiff/AbstractDifferentiation.jl) and all of its backends). # * ``\operatorname{prox}_{f_i}`` relies on the intereface of [ProximalOperators](https://github.com/JuliaFirstOrder/ProximalOperators.jl) (>= 0.15). # Both of the above can be implemented for custom function types, as [documented here](@ref custom_terms). # @@ -51,11 +51,14 @@ # which we will solve using the fast proximal gradient method (also known as fast forward-backward splitting): using LinearAlgebra +using Zygote +using AbstractDifferentiation: ZygoteBackend using ProximalOperators using ProximalAlgorithms -quadratic_cost = ProximalAlgorithms.ZygoteFunction( - x -> dot([3.4 1.2; 1.2 4.5] * x, x) / 2 + dot([-2.3, 9.9], x) +quadratic_cost = ProximalAlgorithms.AutoDifferentiable( + x -> dot([3.4 1.2; 1.2 4.5] * x, x) / 2 + dot([-2.3, 9.9], x), + ZygoteBackend() ) box_indicator = ProximalOperators.IndBox(0, 1) @@ -69,8 +72,10 @@ ffb = ProximalAlgorithms.FastForwardBackward(maxit=1000, tol=1e-5, verbose=true) solution, iterations = ffb(x0=ones(2), f=quadratic_cost, g=box_indicator) # We can verify the correctness of the solution by checking that the negative gradient is orthogonal to the constraints, pointing outwards: +# for this, we just evaluate the closure `cl` returned as second output of [`value_and_gradient_closure`](@ref). --ProximalAlgorithms.gradient(quadratic_cost, solution)[1] +v, cl = ProximalAlgorithms.value_and_gradient_closure(quadratic_cost, solution) +-cl() # Or by plotting the solution against the cost function and constraint: diff --git a/docs/src/index.md b/docs/src/index.md index 87b95ef..b1a119f 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -5,16 +5,21 @@ A Julia package for non-smooth optimization algorithms. [Link to GitHub reposito This package provides algorithms for the minimization of objective functions that include non-smooth terms, such as constraints or non-differentiable penalties. Implemented algorithms include: -* (Fast) Proximal gradient methods -* Douglas-Rachford splitting -* Three-term splitting -* Primal-dual splitting algorithms -* Newton-type methods +- (Fast) Proximal gradient methods +- Douglas-Rachford splitting +- Three-term splitting +- Primal-dual splitting algorithms +- Newton-type methods Check out [this section](@ref problems_algorithms) for an overview of the available algorithms. -This package works well in combination with [ProximalOperators](https://github.com/JuliaFirstOrder/ProximalOperators.jl) (>= 0.15), -which contains a wide range of functions that can be used to express cost terms. +Algorithms rely on: +- [AbstractDifferentiation.jl](https://github.com/JuliaDiff/AbstractDifferentiation.jl) for automatic differentiation +(but you can easily bring your own gradients) +- the [ProximalCore API](https://github.com/JuliaFirstOrder/ProximalCore.jl) for proximal mappings, projections, etc, +to handle non-differentiable terms +(see for example [ProximalOperators](https://github.com/JuliaFirstOrder/ProximalOperators.jl) +for an extensive collection of functions). !!! note @@ -23,20 +28,11 @@ which contains a wide range of functions that can be used to express cost terms. ## Installation -Install the latest stable release with - ```julia julia> ] pkg> add ProximalAlgorithms ``` -To install the development version instead (`master` branch), do - -```julia -julia> ] -pkg> add ProximalAlgorithms#master -``` - ## Citing If you use any of the algorithms from ProximalAlgorithms in your research, you are kindly asked to cite the relevant bibliography. @@ -45,3 +41,4 @@ Please check [this section of the manual](@ref problems_algorithms) for algorith ## Contributing Contributions are welcome in the form of [issue notifications](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl/issues) or [pull requests](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl/pulls). When contributing new algorithms, we highly recommend looking at already implemented ones to get inspiration on how to structure the code. + diff --git a/justfile b/justfile new file mode 100644 index 0000000..5dc42fa --- /dev/null +++ b/justfile @@ -0,0 +1,19 @@ +julia: + julia --project=. + +instantiate: + julia --project=. -e 'using Pkg; Pkg.instantiate()' + +test: + julia --project=. -e 'using Pkg; Pkg.test()' + +format: + julia --project=. -e 'using JuliaFormatter: format; format(".")' + +docs: + julia --project=./docs docs/make.jl + +benchmark: + julia --project=benchmark -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' + julia --project=benchmark benchmark/runbenchmarks.jl + diff --git a/src/ProximalAlgorithms.jl b/src/ProximalAlgorithms.jl index 743c0ea..2494da6 100644 --- a/src/ProximalAlgorithms.jl +++ b/src/ProximalAlgorithms.jl @@ -1,14 +1,48 @@ module ProximalAlgorithms +using AbstractDifferentiation using ProximalCore -using ProximalCore: prox, prox!, gradient, gradient! +using ProximalCore: prox, prox! const RealOrComplex{R} = Union{R,Complex{R}} const Maybe{T} = Union{T,Nothing} +""" + AutoDifferentiable(f, backend) + +Callable struct wrapping function `f` to be auto-differentiated using `backend`. + +When called, it evaluates the same as `f`, while [`ProximalAlgorithms.value_and_gradient_closure`](@ref) +is implemented using `backend` for automatic differentiation. +The backend can be any from [AbstractDifferentiation](https://github.com/JuliaDiff/AbstractDifferentiation.jl). +""" +struct AutoDifferentiable{F, B} + f::F + backend::B +end + +(f::AutoDifferentiable)(x) = f.f(x) + +""" + value_and_gradient_closure(f, x) + +Return a tuple containing the value of `f` at `x`, and a closure `cl`. + +Function `cl`, once called, yields the gradient of `f` at `x`. +""" +value_and_gradient_closure + +function value_and_gradient_closure(f::AutoDifferentiable, x) + fx, pb = AbstractDifferentiation.value_and_pullback_function(f.backend, f.f, x) + return fx, () -> pb(one(fx))[1] +end + +function value_and_gradient_closure(f::ProximalCore.Zero, x) + f(x), () -> zero(x) +end + # various utilities -include("utilities/ad.jl") include("utilities/fb_tools.jl") include("utilities/iteration_tools.jl") diff --git a/src/algorithms/davis_yin.jl b/src/algorithms/davis_yin.jl index 93ad084..01b362b 100644 --- a/src/algorithms/davis_yin.jl +++ b/src/algorithms/davis_yin.jl @@ -55,7 +55,8 @@ end function Base.iterate(iter::DavisYinIteration) z = copy(iter.x0) xg, = prox(iter.g, z, iter.gamma) - grad_f_xg, = gradient(iter.f, xg) + f_xg, cl = value_and_gradient_closure(iter.f, xg) + grad_f_xg = cl() z_half = 2 .* xg .- z .- iter.gamma .* grad_f_xg xh, = prox(iter.h, z_half, iter.gamma) res = xh - xg @@ -66,7 +67,8 @@ end function Base.iterate(iter::DavisYinIteration, state::DavisYinState) prox!(state.xg, iter.g, state.z, iter.gamma) - gradient!(state.grad_f_xg, iter.f, state.xg) + f_xg, cl = value_and_gradient_closure(iter.f, state.xg) + state.grad_f_xg .= cl() state.z_half .= 2 .* state.xg .- state.z .- iter.gamma .* state.grad_f_xg prox!(state.xh, iter.h, state.z_half, iter.gamma) state.res .= state.xh .- state.xg diff --git a/src/algorithms/fast_forward_backward.jl b/src/algorithms/fast_forward_backward.jl index b797478..916511c 100644 --- a/src/algorithms/fast_forward_backward.jl +++ b/src/algorithms/fast_forward_backward.jl @@ -68,7 +68,8 @@ end function Base.iterate(iter::FastForwardBackwardIteration) x = copy(iter.x0) - grad_f_x, f_x = gradient(iter.f, x) + f_x, cl = value_and_gradient_closure(iter.f, x) + grad_f_x = cl() gamma = iter.gamma === nothing ? 1 / lower_bound_smoothness_constant(iter.f, I, x, grad_f_x) : iter.gamma y = x - gamma .* grad_f_x z, g_z = prox(iter.g, y, gamma) @@ -103,7 +104,8 @@ function Base.iterate(iter::FastForwardBackwardIteration{R}, state::FastForwardB state.x .= state.z .+ beta .* (state.z .- state.z_prev) state.z_prev, state.z = state.z, state.z_prev - state.f_x = gradient!(state.grad_f_x, iter.f, state.x) + state.f_x, cl = value_and_gradient_closure(iter.f, state.x) + state.grad_f_x .= cl() state.y .= state.x .- state.gamma .* state.grad_f_x state.g_z = prox!(state.z, iter.g, state.y, state.gamma) state.res .= state.x .- state.z diff --git a/src/algorithms/forward_backward.jl b/src/algorithms/forward_backward.jl index 6d7f43b..15b6040 100644 --- a/src/algorithms/forward_backward.jl +++ b/src/algorithms/forward_backward.jl @@ -59,7 +59,8 @@ end function Base.iterate(iter::ForwardBackwardIteration) x = copy(iter.x0) - grad_f_x, f_x = gradient(iter.f, x) + f_x, cl = value_and_gradient_closure(iter.f, x) + grad_f_x = cl() gamma = iter.gamma === nothing ? 1 / lower_bound_smoothness_constant(iter.f, I, x, grad_f_x) : iter.gamma y = x - gamma .* grad_f_x z, g_z = prox(iter.g, y, gamma) @@ -81,7 +82,8 @@ function Base.iterate(iter::ForwardBackwardIteration{R}, state::ForwardBackwardS state.grad_f_x, state.grad_f_z = state.grad_f_z, state.grad_f_x else state.x, state.z = state.z, state.x - state.f_x = gradient!(state.grad_f_x, iter.f, state.x) + state.f_x, cl = value_and_gradient_closure(iter.f, state.x) + state.grad_f_x .= cl() end state.y .= state.x .- state.gamma .* state.grad_f_x diff --git a/src/algorithms/li_lin.jl b/src/algorithms/li_lin.jl index cb572a8..fc7e367 100644 --- a/src/algorithms/li_lin.jl +++ b/src/algorithms/li_lin.jl @@ -62,7 +62,8 @@ end function Base.iterate(iter::LiLinIteration{R}) where {R} y = copy(iter.x0) - grad_f_y, f_y = gradient(iter.f, y) + f_y, cl = value_and_gradient_closure(iter.f, y) + grad_f_y = cl() # TODO: initialize gamma if not provided # TODO: authors suggest Barzilai-Borwein rule? @@ -102,7 +103,8 @@ function Base.iterate( else # TODO: re-use available space in state? # TODO: backtrack gamma at x - grad_f_x, f_x = gradient(iter.f, x) + f_x, cl = value_and_gradient_closure(iter.f, x) + grad_f_x = cl() x_forward = state.x - state.gamma .* grad_f_x v, g_v = prox(iter.g, x_forward, state.gamma) Fv = iter.f(v) + g_v @@ -121,7 +123,8 @@ function Base.iterate( Fx = Fv end - state.f_y = gradient!(state.grad_f_y, iter.f, state.y) + state.f_y, cl = value_and_gradient_closure(iter.f, state.y) + state.grad_f_y .= cl() state.y_forward .= state.y .- state.gamma .* state.grad_f_y state.g_z = prox!(state.z, iter.g, state.y_forward, state.gamma) diff --git a/src/algorithms/panoc.jl b/src/algorithms/panoc.jl index 5f38dcb..72414ff 100644 --- a/src/algorithms/panoc.jl +++ b/src/algorithms/panoc.jl @@ -86,7 +86,8 @@ f_model(iter::PANOCIteration, state::PANOCState) = f_model(state.f_Ax, state.At_ function Base.iterate(iter::PANOCIteration{R}) where R x = copy(iter.x0) Ax = iter.A * x - grad_f_Ax, f_Ax = gradient(iter.f, Ax) + f_Ax, cl = value_and_gradient_closure(iter.f, Ax) + grad_f_Ax = cl() gamma = iter.gamma === nothing ? iter.alpha / lower_bound_smoothness_constant(iter.f, iter.A, x, grad_f_Ax) : iter.gamma At_grad_f_Ax = iter.A' * grad_f_Ax y = x - gamma .* At_grad_f_Ax @@ -152,7 +153,8 @@ function Base.iterate(iter::PANOCIteration{R, Tx, Tf}, state::PANOCState) where state.x_d .= state.x .+ state.d state.Ax_d .= state.Ax .+ state.Ad - state.f_Ax_d = gradient!(state.grad_f_Ax_d, iter.f, state.Ax_d) + state.f_Ax_d, cl = value_and_gradient_closure(iter.f, state.Ax_d) + state.grad_f_Ax_d .= cl() mul!(state.At_grad_f_Ax_d, adjoint(iter.A), state.grad_f_Ax_d) copyto!(state.x, state.x_d) @@ -189,7 +191,8 @@ function Base.iterate(iter::PANOCIteration{R, Tx, Tf}, state::PANOCState) where # along a line using interpolation and linear combinations # this allows saving operations if isinf(f_Az) - f_Az = gradient!(state.grad_f_Az, iter.f, state.Az) + f_Az, cl = value_and_gradient_closure(iter.f, state.Az) + state.grad_f_Az .= cl() end if isinf(c) mul!(state.At_grad_f_Az, iter.A', state.grad_f_Az) @@ -203,7 +206,8 @@ function Base.iterate(iter::PANOCIteration{R, Tx, Tf}, state::PANOCState) where else # otherwise, in the general case where f is only smooth, we compute # one gradient and matvec per backtracking step - state.f_Ax = gradient!(state.grad_f_Ax, iter.f, state.Ax) + state.f_Ax, cl = value_and_gradient_closure(iter.f, state.Ax) + state.grad_f_Ax .= cl() mul!(state.At_grad_f_Ax, adjoint(iter.A), state.grad_f_Ax) end diff --git a/src/algorithms/panocplus.jl b/src/algorithms/panocplus.jl index cc9e994..2b33e4d 100644 --- a/src/algorithms/panocplus.jl +++ b/src/algorithms/panocplus.jl @@ -79,7 +79,8 @@ f_model(iter::PANOCplusIteration, state::PANOCplusState) = f_model(state.f_Ax, s function Base.iterate(iter::PANOCplusIteration{R}) where {R} x = copy(iter.x0) Ax = iter.A * x - grad_f_Ax, f_Ax = gradient(iter.f, Ax) + f_Ax, cl = value_and_gradient_closure(iter.f, Ax) + grad_f_Ax = cl() gamma = iter.gamma === nothing ? iter.alpha / lower_bound_smoothness_constant(iter.f, iter.A, x, grad_f_Ax) : iter.gamma At_grad_f_Ax = iter.A' * grad_f_Ax y = x - gamma .* At_grad_f_Ax @@ -97,7 +98,8 @@ function Base.iterate(iter::PANOCplusIteration{R}) where {R} ) else mul!(state.Az, iter.A, state.z) - gradient!(state.grad_f_Az, iter.f, state.Az) + f_Az, cl = value_and_gradient_closure(iter.f, state.Az) + state.grad_f_Az = cl() end mul!(state.At_grad_f_Az, adjoint(iter.A), state.grad_f_Az) return state, state @@ -152,7 +154,8 @@ function Base.iterate(iter::PANOCplusIteration{R}, state::PANOCplusState) where end mul!(state.Ax, iter.A, state.x) - state.f_Ax = gradient!(state.grad_f_Ax, iter.f, state.Ax) + state.f_Ax, cl = value_and_gradient_closure(iter.f, state.Ax) + state.grad_f_Ax .= cl() mul!(state.At_grad_f_Ax, adjoint(iter.A), state.grad_f_Ax) state.y .= state.x .- state.gamma .* state.At_grad_f_Ax @@ -162,7 +165,8 @@ function Base.iterate(iter::PANOCplusIteration{R}, state::PANOCplusState) where f_Az_upp = f_model(iter, state) mul!(state.Az, iter.A, state.z) - f_Az = gradient!(state.grad_f_Az, iter.f, state.Az) + f_Az, cl = value_and_gradient_closure(iter.f, state.Az) + state.grad_f_Az .= cl() if (iter.gamma === nothing || iter.adaptive == true) tol = 10 * eps(R) * (1 + abs(f_Az)) if f_Az > f_Az_upp + tol && state.gamma >= iter.minimum_gamma diff --git a/src/algorithms/primal_dual.jl b/src/algorithms/primal_dual.jl index 1ac83ba..d9e9074 100644 --- a/src/algorithms/primal_dual.jl +++ b/src/algorithms/primal_dual.jl @@ -167,7 +167,8 @@ end function Base.iterate(iter::AFBAIteration, state::AFBAState = AFBAState(x=copy(iter.x0), y=copy(iter.y0))) # perform xbar-update step - gradient!(state.gradf, iter.f, state.x) + f_x, cl = value_and_gradient_closure(iter.f, state.x) + state.gradf .= cl() mul!(state.temp_x, iter.L', state.y) state.temp_x .+= state.gradf state.temp_x .*= -iter.gamma[1] @@ -175,7 +176,8 @@ function Base.iterate(iter::AFBAIteration, state::AFBAState = AFBAState(x=copy(i prox!(state.xbar, iter.g, state.temp_x, iter.gamma[1]) # perform ybar-update step - gradient!(state.gradl, convex_conjugate(iter.l), state.y) + lc_y, cl = value_and_gradient_closure(convex_conjugate(iter.l), state.y) + state.gradl .= cl() state.temp_x .= iter.theta .* state.xbar .+ (1 - iter.theta) .* state.x mul!(state.temp_y, iter.L, state.temp_x) state.temp_y .-= state.gradl diff --git a/src/algorithms/sfista.jl b/src/algorithms/sfista.jl index 954309d..c66e1a8 100644 --- a/src/algorithms/sfista.jl +++ b/src/algorithms/sfista.jl @@ -71,7 +71,8 @@ function Base.iterate( state.a = (state.τ + sqrt(state.τ ^ 2 + 4 * state.τ * state.APrev)) / 2 state.A = state.APrev + state.a state.xt .= (state.APrev / state.A) .* state.yPrev + (state.a / state.A) .* state.xPrev - gradient!(state.gradf_xt, iter.f, state.xt) + f_xt, cl = value_and_gradient_closure(iter.f, state.xt) + state.gradf_xt .= cl() λ2 = state.λ / (1 + state.λ * iter.mf) # FISTA acceleration steps. prox!(state.y, iter.g, state.xt - λ2 * state.gradf_xt, λ2) @@ -93,7 +94,8 @@ function check_sc(state::SFISTAState, iter::SFISTAIteration, tol, termination_ty else # Classic (approximate) first-order stationary point [4]. The main inclusion is: r ∈ ∇f(y) + ∂h(y). λ2 = state.λ / (1 + state.λ * iter.mf) - gradf_y, = gradient(iter.f, state.y) + f_y, cl = value_and_gradient_closure(iter.f, state.y) + gradf_y = cl() r = gradf_y - state.gradf_xt + (state.xt - state.y) / λ2 res = norm(r) end diff --git a/src/algorithms/zerofpr.jl b/src/algorithms/zerofpr.jl index fee5ea5..6008cc4 100644 --- a/src/algorithms/zerofpr.jl +++ b/src/algorithms/zerofpr.jl @@ -84,7 +84,8 @@ f_model(iter::ZeroFPRIteration, state::ZeroFPRState) = f_model(state.f_Ax, state function Base.iterate(iter::ZeroFPRIteration{R}) where R x = copy(iter.x0) Ax = iter.A * x - grad_f_Ax, f_Ax = gradient(iter.f, Ax) + f_Ax, cl = value_and_gradient_closure(iter.f, Ax) + grad_f_Ax = cl() gamma = iter.gamma === nothing ? iter.alpha / lower_bound_smoothness_constant(iter.f, iter.A, x, grad_f_Ax) : iter.gamma At_grad_f_Ax = iter.A' * grad_f_Ax y = x - gamma .* At_grad_f_Ax @@ -130,7 +131,9 @@ function Base.iterate(iter::ZeroFPRIteration{R}, state::ZeroFPRState) where R f_Axbar_upp, f_Axbar else mul!(state.Axbar, iter.A, state.xbar) - f_model(iter, state), gradient!(state.grad_f_Axbar, iter.f, state.Axbar) + f_Axbar, cl = value_and_gradient_closure(iter.f, state.Axbar) + state.grad_f_Axbar .= cl() + f_model(iter, state), f_Axbar end # compute FBE @@ -164,7 +167,8 @@ function Base.iterate(iter::ZeroFPRIteration{R}, state::ZeroFPRState) where R state.x .= state.xbar_prev .+ state.tau .* state.d state.Ax .= state.Axbar .+ state.tau .* state.Ad # TODO: can precompute most of next line in case f is quadratic - state.f_Ax = gradient!(state.grad_f_Ax, iter.f, state.Ax) + state.f_Ax, cl = value_and_gradient_closure(iter.f, state.Ax) + state.grad_f_Ax .= cl() mul!(state.At_grad_f_Ax, iter.A', state.grad_f_Ax) state.y .= state.x .- state.gamma .* state.At_grad_f_Ax state.g_xbar = prox!(state.xbar, iter.g, state.y, state.gamma) diff --git a/src/utilities/ad.jl b/src/utilities/ad.jl deleted file mode 100644 index f1e43b7..0000000 --- a/src/utilities/ad.jl +++ /dev/null @@ -1,14 +0,0 @@ -using Zygote: pullback -using ProximalCore - -struct ZygoteFunction{F} - f::F -end - -(f::ZygoteFunction)(x) = f.f(x) - -function ProximalCore.gradient!(grad, f::ZygoteFunction, x) - fx, pb = pullback(f.f, x) - grad .= pb(one(fx))[1] - return fx -end diff --git a/src/utilities/fb_tools.jl b/src/utilities/fb_tools.jl index 34d6980..e2acff4 100644 --- a/src/utilities/fb_tools.jl +++ b/src/utilities/fb_tools.jl @@ -7,29 +7,29 @@ end function lower_bound_smoothness_constant(f, A, x, grad_f_Ax) R = real(eltype(x)) xeps = x .+ 1 - grad_f_Axeps, _ = gradient(f, A * xeps) + f_Axeps, cl = value_and_gradient_closure(f, A * xeps) + grad_f_Axeps = cl() return norm(A' * (grad_f_Axeps - grad_f_Ax)) / R(sqrt(length(x))) end function lower_bound_smoothness_constant(f, A, x) + R = real(eltype(x)) Ax = A * x - grad_f_Ax, _ = gradient(f, Ax) + f_Ax, cl = value_and_gradient_closure(f, Ax) + grad_f_Ax = cl() return lower_bound_smoothness_constant(f, A, x, grad_f_Ax) end _mul!(y, L, x) = mul!(y, L, x) _mul!(y, ::Nothing, x) = return -_gradient!(y, f, x) = gradient!(y, f, x) -_gradient!(::Nothing, f, x) = f(x) - function backtrack_stepsize!( gamma::R, f, A, g, x, f_Ax::R, At_grad_f_Ax, y, z, g_z::R, res, Az, grad_f_Az=nothing; alpha = 1, minimum_gamma = 1e-7 ) where R f_Az_upp = f_model(f_Ax, At_grad_f_Ax, res, alpha / gamma) _mul!(Az, A, z) - f_Az = _gradient!(grad_f_Az, f, Az) + f_Az, cl = value_and_gradient_closure(f, Az) tol = 10 * eps(R) * (1 + abs(f_Az)) while f_Az > f_Az_upp + tol && gamma >= minimum_gamma gamma /= 2 @@ -38,9 +38,12 @@ function backtrack_stepsize!( res .= x .- z f_Az_upp = f_model(f_Ax, At_grad_f_Ax, res, alpha / gamma) _mul!(Az, A, z) - f_Az = _gradient!(grad_f_Az, f, Az) + f_Az, cl = value_and_gradient_closure(f, Az) tol = 10 * eps(R) * (1 + abs(f_Az)) end + if grad_f_Az !== nothing + grad_f_Az .= cl() + end if gamma < minimum_gamma @warn "stepsize `gamma` became too small ($(gamma))" end @@ -51,7 +54,8 @@ function backtrack_stepsize!( gamma, f, A, g, x; alpha = 1, minimum_gamma = 1e-7 ) Ax = A * x - grad_f_Ax, f_Ax = gradient(f, Ax) + f_Ax, cl = value_and_gradient_closure(f, Ax) + grad_f_Ax = cl() At_grad_f_Ax = A' * grad_f_Ax y = x - gamma .* At_grad_f_Ax z, g_z = prox(g, y, gamma) diff --git a/test/Project.toml b/test/Project.toml index ea59c93..7241417 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,10 +1,14 @@ [deps] +AbstractDifferentiation = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d" AbstractOperators = "d9c5613a-d543-52d8-9afd-8f241a8c3f1c" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProximalCore = "dc4f5ac2-75d1-4f31-931e-60435d74994b" ProximalOperators = "a725b495-10eb-56fe-b38b-717eba820537" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd" +ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" diff --git a/test/definitions/arraypartition.jl b/test/definitions/arraypartition.jl deleted file mode 100644 index 87dcb78..0000000 --- a/test/definitions/arraypartition.jl +++ /dev/null @@ -1,20 +0,0 @@ -import ProximalCore -import RecursiveArrayTools - -@inline function ProximalCore.prox(h, x::RecursiveArrayTools.ArrayPartition, gamma...) - # unwrap - y, fy = ProximalCore.prox(h, x.x, gamma...) - # wrap - return RecursiveArrayTools.ArrayPartition(y), fy -end - -@inline function ProximalCore.gradient(h, x::RecursiveArrayTools.ArrayPartition) - # unwrap - grad, fx = ProximalCore.gradient(h, x.x) - # wrap - return RecursiveArrayTools.ArrayPartition(grad), fx -end - -@inline ProximalCore.prox!(y::RecursiveArrayTools.ArrayPartition, h, x::RecursiveArrayTools.ArrayPartition, gamma...) = ProximalCore.prox!(y.x, h, x.x, gamma...) - -@inline ProximalCore.gradient!(y::RecursiveArrayTools.ArrayPartition, h, x::RecursiveArrayTools.ArrayPartition) = ProximalCore.gradient!(y.x, h, x.x) diff --git a/test/definitions/compose.jl b/test/definitions/compose.jl deleted file mode 100644 index 8e38c2a..0000000 --- a/test/definitions/compose.jl +++ /dev/null @@ -1,29 +0,0 @@ -using RecursiveArrayTools: ArrayPartition -using ProximalCore - -struct ComposeAffine - f - A - b -end - -function (g::ComposeAffine)(x) - res = g.A * x .+ g.b - return g.f(res) -end - -function compose_affine_gradient!(y, g::ComposeAffine, x) - res = g.A * x .+ g.b - gradres, v = gradient(g.f, res) - mul!(y, adjoint(g.A), gradres) - return v -end - -ProximalCore.gradient!(y, g::ComposeAffine, x) = compose_affine_gradient!(y, g, x) -ProximalCore.gradient!(y::ArrayPartition, g::ComposeAffine, x::ArrayPartition) = compose_affine_gradient!(y, g, x) - -function ProximalCore.gradient(h::ComposeAffine, x::ArrayPartition) - grad_fx = similar(x) - fx = ProximalCore.gradient!(grad_fx, h, x) - return grad_fx, fx -end diff --git a/test/problems/test_elasticnet.jl b/test/problems/test_elasticnet.jl index 72e52b8..ffa376a 100644 --- a/test/problems/test_elasticnet.jl +++ b/test/problems/test_elasticnet.jl @@ -1,8 +1,10 @@ -@testset "Elastic net ($T)" for T in [Float32, Float64, ComplexF32, ComplexF64] - using ProximalOperators - using ProximalAlgorithms - using LinearAlgebra +using LinearAlgebra +using ProximalOperators: NormL1, SqrNormL2, ElasticNet, Translate +using ProximalAlgorithms +using Zygote +using AbstractDifferentiation: ZygoteBackend +@testset "Elastic net ($T)" for T in [Float32, Float64, ComplexF32, ComplexF64] A = T[ 1.0 -2.0 3.0 -4.0 5.0 2.0 -1.0 0.0 -1.0 3.0 @@ -19,7 +21,7 @@ reg1 = NormL1(R(1)) reg2 = SqrNormL2(R(1)) loss = Translate(SqrNormL2(R(1)), -b) - cost = LeastSquares(A, b) + cost = ProximalAlgorithms.AutoDifferentiable(x -> (norm(A*x - b)^2) / 2, ZygoteBackend()) L = opnorm(A)^2 @@ -52,7 +54,7 @@ afba_test_params = [ (2, 0, 130), - (1, 1, 1890), + (1, 1, 2000), (0, 1, 320), (0, 0, 194), (1, 0, 130), @@ -69,7 +71,7 @@ solver = ProximalAlgorithms.AFBA(theta = theta, mu = mu, tol = R(1e-6)) (x_afba, y_afba), it_afba = - solver(x0 = x0, y0 = y0, f = reg2, g = reg1, h = loss, L = A, beta_f = 1) + solver(x0 = x0, y0 = y0, f = ProximalAlgorithms.AutoDifferentiable(reg2, ZygoteBackend()), g = reg1, h = loss, L = A, beta_f = 1) @test eltype(x_afba) == T @test eltype(y_afba) == T @test norm(x_afba - x_star, Inf) <= 1e-4 @@ -86,7 +88,7 @@ solver = ProximalAlgorithms.AFBA(theta = theta, mu = mu, tol = R(1e-6)) (x_afba, y_afba), it_afba = - solver(x0 = x0, y0 = y0, f = reg2, g = reg1, h = loss, L = A, beta_f = 1) + solver(x0 = x0, y0 = y0, f = ProximalAlgorithms.AutoDifferentiable(reg2, ZygoteBackend()), g = reg1, h = loss, L = A, beta_f = 1) @test eltype(x_afba) == T @test eltype(y_afba) == T @test norm(x_afba - x_star, Inf) <= 1e-4 diff --git a/test/problems/test_equivalence.jl b/test/problems/test_equivalence.jl index 004d618..de8f970 100644 --- a/test/problems/test_equivalence.jl +++ b/test/problems/test_equivalence.jl @@ -1,7 +1,8 @@ using LinearAlgebra using Test - -using ProximalOperators +using Zygote +using AbstractDifferentiation: ZygoteBackend +using ProximalOperators: LeastSquares, NormL1 using ProximalAlgorithms: DouglasRachfordIteration, DRLSIteration, ForwardBackwardIteration, PANOCIteration, @@ -51,7 +52,7 @@ end lam = R(0.1) * norm(A' * b, Inf) - f = LeastSquares(A, b) + f = ProximalAlgorithms.AutoDifferentiable(x -> (norm(A*x - b)^2) / 2, ZygoteBackend()) g = NormL1(lam) x0 = zeros(R, n) @@ -79,7 +80,7 @@ end lam = R(0.1) * norm(A' * b, Inf) - f = LeastSquares(A, b) + f = ProximalAlgorithms.AutoDifferentiable(x -> (norm(A*x - b)^2) / 2, ZygoteBackend()) g = NormL1(lam) x0 = zeros(R, n) diff --git a/test/problems/test_lasso_small.jl b/test/problems/test_lasso_small.jl index ba0d69d..2f1c387 100644 --- a/test/problems/test_lasso_small.jl +++ b/test/problems/test_lasso_small.jl @@ -1,7 +1,9 @@ using LinearAlgebra using Test -using ProximalOperators +using Zygote +using AbstractDifferentiation: ZygoteBackend +using ProximalOperators: NormL1, LeastSquares using ProximalAlgorithms using ProximalAlgorithms: LBFGS, Broyden, AndersonAcceleration, @@ -23,8 +25,10 @@ using ProximalAlgorithms: lam = R(0.1) * norm(A' * b, Inf) @test typeof(lam) == R - f = Translate(SqrNormL2(R(1)), -b) - f2 = LeastSquares(A, b) + f_autodiff = ProximalAlgorithms.AutoDifferentiable(x -> (norm(x - b)^2)/2, ZygoteBackend()) + fA_autodiff = ProximalAlgorithms.AutoDifferentiable(x -> (norm(A*x - b)^2)/2, ZygoteBackend()) + f_prox = Translate(SqrNormL2(R(1)), -b) + fA_prox = LeastSquares(A, b) g = NormL1(lam) Lf = opnorm(A)^2 @@ -37,7 +41,7 @@ using ProximalAlgorithms: x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.ForwardBackward(tol = TOL) - x, it = @inferred solver(x0 = x0, f = f2, g = g, Lf = Lf) + x, it = @inferred solver(x0 = x0, f = fA_autodiff, g = g, Lf = Lf) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 150 @@ -48,7 +52,7 @@ using ProximalAlgorithms: x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.ForwardBackward(tol = TOL, adaptive = true) - x, it = @inferred solver(x0 = x0, f = f2, g = g) + x, it = @inferred solver(x0 = x0, f = fA_autodiff, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 300 @@ -59,7 +63,7 @@ using ProximalAlgorithms: x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.FastForwardBackward(tol = TOL) - x, it = @inferred solver(x0 = x0, f = f2, g = g, Lf = Lf) + x, it = @inferred solver(x0 = x0, f = fA_autodiff, g = g, Lf = Lf) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 100 @@ -71,7 +75,7 @@ using ProximalAlgorithms: x0_backup = copy(x0) solver = ProximalAlgorithms.FastForwardBackward(tol = TOL, adaptive = true) - x, it = @inferred solver(x0 = x0, f = f2, g = g) + x, it = @inferred solver(x0 = x0, f = fA_autodiff, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 200 @@ -82,7 +86,7 @@ using ProximalAlgorithms: x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.FastForwardBackward(tol = TOL) - x, it = @inferred solver(x0 = x0, f = f2, g = g, Lf = Lf, extrapolation_sequence=FixedNesterovSequence(real(T))) + x, it = @inferred solver(x0 = x0, f = fA_autodiff, g = g, Lf = Lf, extrapolation_sequence=FixedNesterovSequence(real(T))) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 100 @@ -93,7 +97,7 @@ using ProximalAlgorithms: x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.ZeroFPR(tol = TOL) - x, it = @inferred solver(x0 = x0, f = f, A = A, g = g, Lf = Lf) + x, it = @inferred solver(x0 = x0, f = f_autodiff, A = A, g = g, Lf = Lf) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -104,7 +108,7 @@ using ProximalAlgorithms: x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.ZeroFPR(adaptive = true, tol = TOL) - x, it = @inferred solver(x0 = x0, f = f, A = A, g = g) + x, it = @inferred solver(x0 = x0, f = f_autodiff, A = A, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -116,7 +120,7 @@ using ProximalAlgorithms: x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.PANOC(tol = TOL) - x, it = @inferred solver(x0 = x0, f = f, A = A, g = g, Lf = Lf) + x, it = @inferred solver(x0 = x0, f = f_autodiff, A = A, g = g, Lf = Lf) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -128,7 +132,7 @@ using ProximalAlgorithms: x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.PANOC(adaptive = true, tol = TOL) - x, it = @inferred solver(x0 = x0, f = f, A = A, g = g) + x, it = @inferred solver(x0 = x0, f = f_autodiff, A = A, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -139,7 +143,7 @@ using ProximalAlgorithms: x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.PANOCplus(tol = TOL) - x, it = @inferred solver(x0 = x0, f = f, A = A, g = g, Lf = Lf) + x, it = @inferred solver(x0 = x0, f = f_autodiff, A = A, g = g, Lf = Lf) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -150,7 +154,7 @@ using ProximalAlgorithms: x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL) - x, it = @inferred solver(x0 = x0, f = f, A = A, g = g) + x, it = @inferred solver(x0 = x0, f = f_autodiff, A = A, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -161,7 +165,7 @@ using ProximalAlgorithms: x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.DouglasRachford(gamma = R(10) / opnorm(A)^2, tol = TOL) - y, it = @inferred solver(x0 = x0, f = f2, g = g) + y, it = @inferred solver(x0 = x0, f = fA_prox, g = g) @test eltype(y) == T @test norm(y - x_star, Inf) <= TOL @test it < 30 @@ -178,7 +182,7 @@ using ProximalAlgorithms: x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.DRLS(tol = 10 * TOL, directions=acc) - z, it = @inferred solver(x0 = x0, f = f2, g = g, Lf = Lf) + z, it = @inferred solver(x0 = x0, f = fA_prox, g = g, Lf = Lf) @test eltype(z) == T @test norm(z - x_star, Inf) <= 10 * TOL @test it < maxit @@ -189,7 +193,7 @@ using ProximalAlgorithms: x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.AFBA(theta = 1, mu = 1, tol = R(1e-6)) - (x_afba, y_afba), it_afba = @inferred solver(x0 = x0, y0 = zeros(T, n), f = f2, g = g, beta_f = opnorm(A)^2) + (x_afba, y_afba), it_afba = @inferred solver(x0 = x0, y0 = zeros(T, n), f = fA_autodiff, g = g, beta_f = opnorm(A)^2) @test eltype(x_afba) == T @test eltype(y_afba) == T @test norm(x_afba - x_star, Inf) <= 1e-4 @@ -197,7 +201,7 @@ using ProximalAlgorithms: @test x0 == x0_backup solver = ProximalAlgorithms.AFBA(theta = 1, mu = 1, tol = R(1e-6)) - (x_afba, y_afba), it_afba = @inferred solver(x0 = x0, y0 = zeros(T, n), f = f2, h = g, beta_f = opnorm(A)^2) + (x_afba, y_afba), it_afba = @inferred solver(x0 = x0, y0 = zeros(T, n), f = fA_autodiff, h = g, beta_f = opnorm(A)^2) @test eltype(x_afba) == T @test eltype(y_afba) == T @test norm(x_afba - x_star, Inf) <= 1e-4 @@ -205,7 +209,7 @@ using ProximalAlgorithms: @test x0 == x0_backup solver = ProximalAlgorithms.AFBA(theta = 1, mu = 1, tol = R(1e-6)) - (x_afba, y_afba), it_afba = @inferred solver(x0 = x0, y0 = zeros(T, m), h = f, L = A, g = g) + (x_afba, y_afba), it_afba = @inferred solver(x0 = x0, y0 = zeros(T, m), h = f_prox, L = A, g = g) @test eltype(x_afba) == T @test eltype(y_afba) == T @test norm(x_afba - x_star, Inf) <= 1e-4 @@ -217,7 +221,7 @@ using ProximalAlgorithms: x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.SFISTA(tol = 10 * TOL) - y, it = @inferred solver(x0 = x0, f = f2, g = g, Lf = Lf) + y, it = @inferred solver(x0 = x0, f = fA_autodiff, g = g, Lf = Lf) @test eltype(y) == T @test norm(y - x_star, Inf) <= 10 * TOL @test it < 100 diff --git a/test/problems/test_lasso_small_h_split.jl b/test/problems/test_lasso_small_h_split.jl deleted file mode 100644 index 54d0fcf..0000000 --- a/test/problems/test_lasso_small_h_split.jl +++ /dev/null @@ -1,171 +0,0 @@ -@testset "Lasso small (h. split, $T)" for T in [Float32, Float64, ComplexF32, ComplexF64] - using ProximalOperators - using ProximalAlgorithms - using LinearAlgebra - using AbstractOperators: MatrixOp - using RecursiveArrayTools: ArrayPartition - - A1 = T[ - 1.0 -2.0 3.0 - 2.0 -1.0 0.0 - -1.0 0.0 4.0 - -1.0 -1.0 -1.0 - ] - A2 = T[ - -4.0 5.0 - -1.0 3.0 - -3.0 2.0 - 1.0 3.0 - ] - A = hcat(MatrixOp(A1), MatrixOp(A2)) - b = T[1.0, 2.0, 3.0, 4.0] - - m, n1 = size(A1) - _, n2 = size(A2) - n = n1 + n2 - - R = real(T) - - lam = R(0.1) * norm(A' * b, Inf) - @test typeof(lam) == R - - f = Translate(SqrNormL2(R(1)), -b) - f2 = ComposeAffine(SqrNormL2(R(1)), A, -b) - g = SeparableSum(NormL1(lam), NormL1(lam)) - - Lf = opnorm([A1 A2])^2 - - x_star = ArrayPartition( - T[-3.877278911564627e-01, 0, 0], - T[2.174149659863943e-02, 6.168435374149660e-01], - ) - - TOL = R(1e-4) - - @testset "ForwardBackward" begin - - ## Nonfast/Nonadaptive - - x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) - x0_backup = copy(x0) - solver = ProximalAlgorithms.ForwardBackward(tol = TOL) - x, it = solver(x0 = x0, f = f2, g = g, Lf = Lf) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 150 - @test x0 == x0_backup - - # Nonfast/Adaptive - - x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) - x0_backup = copy(x0) - solver = ProximalAlgorithms.ForwardBackward(tol = TOL, adaptive = true) - x, it = solver(x0 = x0, f = f2, g = g) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 300 - @test x0 == x0_backup - - # Fast/Nonadaptive - - x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) - x0_backup = copy(x0) - solver = ProximalAlgorithms.FastForwardBackward(tol = TOL) - x, it = solver(x0 = x0, f = f2, g = g, Lf = Lf) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 100 - @test x0 == x0_backup - - # Fast/Adaptive - - x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) - x0_backup = copy(x0) - solver = - ProximalAlgorithms.FastForwardBackward(tol = TOL, adaptive = true) - x, it = solver(x0 = x0, f = f2, g = g) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 200 - @test x0 == x0_backup - end - - @testset "ZeroFPR" begin - - # ZeroFPR/Nonadaptive - - x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) - x0_backup = copy(x0) - solver = ProximalAlgorithms.ZeroFPR(tol = TOL) - x, it = solver(x0 = x0, f = f, A = A, g = g, Lf = Lf) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 20 - @test x0 == x0_backup - - # ZeroFPR/Adaptive - - x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) - x0_backup = copy(x0) - solver = ProximalAlgorithms.ZeroFPR(adaptive = true, tol = TOL) - x, it = solver(x0 = x0, f = f, A = A, g = g) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 20 - @test x0 == x0_backup - - end - - @testset "PANOC" begin - - # PANOC/Nonadaptive - - x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) - x0_backup = copy(x0) - solver = ProximalAlgorithms.PANOC(tol = TOL) - x, it = solver(x0 = x0, f = f, A = A, g = g, Lf = Lf) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 20 - @test x0 == x0_backup - - ## PANOC/Adaptive - - x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) - x0_backup = copy(x0) - solver = ProximalAlgorithms.PANOC(adaptive = true, tol = TOL) - x, it = solver(x0 = x0, f = f, A = A, g = g) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 20 - @test x0 == x0_backup - - end - - @testset "PANOCplus" begin - - # PANOCplus/Nonadaptive - - x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) - x0_backup = copy(x0) - solver = ProximalAlgorithms.PANOCplus(tol = TOL) - x, it = solver(x0 = x0, f = f, A = A, g = g, Lf = Lf) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 20 - @test x0 == x0_backup - - ## PANOCplus/Adaptive - - x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) - x0_backup = copy(x0) - solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL) - x, it = solver(x0 = x0, f = f, A = A, g = g) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 20 - @test x0 == x0_backup - - end - -end diff --git a/test/problems/test_lasso_small_strongly_convex.jl b/test/problems/test_lasso_small_strongly_convex.jl index 7dd959b..d2e7e13 100644 --- a/test/problems/test_lasso_small_strongly_convex.jl +++ b/test/problems/test_lasso_small_strongly_convex.jl @@ -1,7 +1,9 @@ using LinearAlgebra using Test -using ProximalOperators +using Zygote +using AbstractDifferentiation: ZygoteBackend +using ProximalOperators: NormL1, LeastSquares using ProximalAlgorithms @testset "Lasso small (strongly convex, $T)" for T in [Float32, Float64] @@ -29,7 +31,8 @@ using ProximalAlgorithms A = Q * D * Q' b = A * x_star + lam * inv(A') * sign.(x_star) - f = LeastSquares(A, b) + fA_prox = LeastSquares(A, b) + fA_autodiff = ProximalAlgorithms.AutoDifferentiable(x -> (norm(A*x - b)^2)/2, ZygoteBackend()) g = NormL1(lam) TOL = T(1e-4) @@ -39,7 +42,7 @@ using ProximalAlgorithms @testset "SFISTA" begin solver = ProximalAlgorithms.SFISTA(tol = TOL) - y, it = solver(x0=x0, f=f, g=g, Lf=Lf, mf=mf) + y, it = solver(x0=x0, f=fA_autodiff, g=g, Lf=Lf, mf=mf) @test eltype(y) == T @test norm(y - x_star) <= TOL @test it < 40 @@ -48,7 +51,7 @@ using ProximalAlgorithms @testset "ForwardBackward" begin solver = ProximalAlgorithms.ForwardBackward(tol = TOL) - y, it = solver(x0=x0, f=f, g=g, Lf=Lf) + y, it = solver(x0=x0, f=fA_autodiff, g=g, Lf=Lf) @test eltype(y) == T @test norm(y - x_star, Inf) <= TOL @test it < 110 @@ -57,7 +60,7 @@ using ProximalAlgorithms @testset "FastForwardBackward" begin solver = ProximalAlgorithms.FastForwardBackward(tol = TOL) - y, it = solver(x0=x0, f=f, g=g, Lf=Lf, mf=mf) + y, it = solver(x0=x0, f=fA_autodiff, g=g, Lf=Lf, mf=mf) @test eltype(y) == T @test norm(y - x_star, Inf) <= TOL @test it < 35 @@ -66,7 +69,7 @@ using ProximalAlgorithms @testset "FastForwardBackward (custom extrapolation)" begin solver = ProximalAlgorithms.FastForwardBackward(tol = TOL) - y, it = solver(x0=x0, f=f, g=g, gamma = 1/Lf, mf=mf, extrapolation_sequence=ProximalAlgorithms.ConstantNesterovSequence(mf, 1/Lf)) + y, it = solver(x0=x0, f=fA_autodiff, g=g, gamma = 1/Lf, mf=mf, extrapolation_sequence=ProximalAlgorithms.ConstantNesterovSequence(mf, 1/Lf)) @test eltype(y) == T @test norm(y - x_star, Inf) <= TOL @test it < 35 @@ -75,7 +78,7 @@ using ProximalAlgorithms @testset "DRLS" begin solver = ProximalAlgorithms.DRLS(tol = TOL) - v, it = solver(x0=x0, f=f, g=g, mf=mf) + v, it = solver(x0=x0, f=fA_prox, g=g, mf=mf) @test eltype(v) == T @test norm(v - x_star, Inf) <= TOL @test it < 14 @@ -84,7 +87,7 @@ using ProximalAlgorithms @testset "PANOC" begin solver = ProximalAlgorithms.PANOC(tol = TOL) - y, it = solver(x0=x0, f=f, g=g, Lf=Lf) + y, it = solver(x0=x0, f=fA_autodiff, g=g, Lf=Lf) @test eltype(y) == T @test norm(y - x_star, Inf) <= TOL @test it < 45 @@ -93,7 +96,7 @@ using ProximalAlgorithms @testset "PANOCplus" begin solver = ProximalAlgorithms.PANOCplus(tol = TOL) - y, it = solver(x0=x0, f=f, g=g, Lf=Lf) + y, it = solver(x0=x0, f=fA_autodiff, g=g, Lf=Lf) @test eltype(y) == T @test norm(y - x_star, Inf) <= TOL @test it < 45 diff --git a/test/problems/test_lasso_small_v_split.jl b/test/problems/test_lasso_small_v_split.jl deleted file mode 100644 index 855df8b..0000000 --- a/test/problems/test_lasso_small_v_split.jl +++ /dev/null @@ -1,164 +0,0 @@ -@testset "Lasso small (v. split, $T)" for T in [Float32, Float64, ComplexF32, ComplexF64] - using ProximalOperators - using ProximalAlgorithms - using LinearAlgebra - using AbstractOperators: MatrixOp - - A1 = T[ - 1.0 -2.0 3.0 -4.0 5.0 - 2.0 -1.0 0.0 -1.0 3.0 - ] - A2 = T[ - -1.0 0.0 4.0 -3.0 2.0 - -1.0 -1.0 -1.0 1.0 3.0 - ] - A = vcat(MatrixOp(A1), MatrixOp(A2)) - b1 = T[1.0, 2.0] - b2 = T[3.0, 4.0] - - m1, n = size(A1) - m2, _ = size(A2) - m = m1 + m2 - - R = real(T) - - lam = R(0.1) * norm([A1; A2]' * [b1; b2], Inf) - @test typeof(lam) == R - - f = SeparableSum(Translate(SqrNormL2(R(1)), -b1), Translate(SqrNormL2(R(1)), -b2)) - f2 = Sum(LeastSquares(A1, b1), LeastSquares(A2, b2)) - g = NormL1(lam) - - Lf = opnorm([A1; A2])^2 - - x_star = T[-3.877278911564627e-01, 0, 0, 2.174149659863943e-02, 6.168435374149660e-01] - - TOL = R(1e-4) - - @testset "ForwardBackward" begin - - ## Nonfast/Nonadaptive - - x0 = zeros(T, n) - x0_backup = copy(x0) - solver = ProximalAlgorithms.ForwardBackward(tol = TOL) - x, it = solver(x0 = x0, f = f2, g = g, Lf = Lf) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 150 - @test x0 == x0_backup - - # Nonfast/Adaptive - - x0 = zeros(T, n) - x0_backup = copy(x0) - solver = ProximalAlgorithms.ForwardBackward(tol = TOL, adaptive = true) - x, it = solver(x0 = x0, f = f2, g = g) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 300 - @test x0 == x0_backup - - # Fast/Nonadaptive - - x0 = zeros(T, n) - x0_backup = copy(x0) - solver = ProximalAlgorithms.FastForwardBackward(tol = TOL) - x, it = solver(x0 = x0, f = f2, g = g, Lf = Lf) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 100 - @test x0 == x0_backup - - # Fast/Adaptive - - x0 = zeros(T, n) - x0_backup = copy(x0) - solver = - ProximalAlgorithms.FastForwardBackward(tol = TOL, adaptive = true) - x, it = solver(x0 = x0, f = f2, g = g) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 200 - @test x0 == x0_backup - end - - @testset "ZeroFPR" begin - - # ZeroFPR/Nonadaptive - - x0 = zeros(T, n) - x0_backup = copy(x0) - solver = ProximalAlgorithms.ZeroFPR(tol = TOL) - x, it = solver(x0 = x0, f = f, A = A, g = g, Lf = opnorm([A1; A2])^2) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 20 - @test x0 == x0_backup - - # ZeroFPR/Adaptive - - x0 = zeros(T, n) - x0_backup = copy(x0) - solver = ProximalAlgorithms.ZeroFPR(adaptive = true, tol = TOL) - x, it = solver(x0 = x0, f = f, A = A, g = g) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 20 - @test x0 == x0_backup - - end - - @testset "PANOC" begin - - # PANOC/Nonadaptive - - x0 = zeros(T, n) - x0_backup = copy(x0) - solver = ProximalAlgorithms.PANOC(tol = TOL) - x, it = solver(x0 = x0, f = f, A = A, g = g, Lf = opnorm([A1; A2])^2) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 20 - @test x0 == x0_backup - - ## PANOC/Adaptive - - x0 = zeros(T, n) - x0_backup = copy(x0) - solver = ProximalAlgorithms.PANOC(adaptive = true, tol = TOL) - x, it = solver(x0 = x0, f = f, A = A, g = g) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 20 - @test x0 == x0_backup - - end - - @testset "PANOCplus" begin - - # PANOCplus/Nonadaptive - - x0 = zeros(T, n) - x0_backup = copy(x0) - solver = ProximalAlgorithms.PANOCplus(tol = TOL) - x, it = solver(x0 = x0, f = f, A = A, g = g, Lf = opnorm([A1; A2])^2) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 20 - @test x0 == x0_backup - - ## PANOCplus/Adaptive - - x0 = zeros(T, n) - x0_backup = copy(x0) - solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL) - x, it = solver(x0 = x0, f = f, A = A, g = g) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 20 - @test x0 == x0_backup - - end - -end diff --git a/test/problems/test_linear_programs.jl b/test/problems/test_linear_programs.jl index 51e69c6..30f82e5 100644 --- a/test/problems/test_linear_programs.jl +++ b/test/problems/test_linear_programs.jl @@ -1,4 +1,6 @@ -using ProximalOperators +using Zygote +using AbstractDifferentiation: ZygoteBackend +using ProximalOperators: Linear, IndNonnegative, IndPoint, IndAffine, SlicedSeparableSum using ProximalAlgorithms using LinearAlgebra @@ -68,7 +70,7 @@ end @testset "AFBA" begin - f = Linear(c) + f = ProximalAlgorithms.AutoDifferentiable(x -> dot(c, x), ZygoteBackend()) g = IndNonnegative() h = IndPoint(b) @@ -94,7 +96,7 @@ end @testset "VuCondat" begin - f = Linear(c) + f = ProximalAlgorithms.AutoDifferentiable(x -> dot(c, x), ZygoteBackend()) g = IndNonnegative() h = IndPoint(b) @@ -143,7 +145,7 @@ end @testset "DavisYin" begin - f = Linear(c) + f = ProximalAlgorithms.AutoDifferentiable(x -> dot(c, x), ZygoteBackend()) g = IndNonnegative() h = IndAffine(A, b) diff --git a/test/problems/test_nonconvex_qp.jl b/test/problems/test_nonconvex_qp.jl index dd3d2e5..6f1dfca 100644 --- a/test/problems/test_nonconvex_qp.jl +++ b/test/problems/test_nonconvex_qp.jl @@ -1,5 +1,7 @@ +using Zygote +using AbstractDifferentiation: ZygoteBackend using ProximalAlgorithms -using ProximalOperators +using ProximalOperators: IndBox using LinearAlgebra using Random using Test @@ -10,7 +12,7 @@ using Test low = T(-1.0) upp = T(+1.0) - f = Quadratic(Q, q) + f = ProximalAlgorithms.AutoDifferentiable(x -> dot(Q * x, x) / 2 + dot(q, x), ZygoteBackend()) g = IndBox(low, upp) n = 2 @@ -76,7 +78,7 @@ end low = T(-1.0) upp = T(+1.0) - f = Quadratic(Q, q) + f = ProximalAlgorithms.AutoDifferentiable(x -> dot(Q * x, x) / 2 + dot(q, x), ZygoteBackend()) g = IndBox(low, upp) Lip = maximum(abs.(eigenvalues)) diff --git a/test/problems/test_sparse_logistic_small.jl b/test/problems/test_sparse_logistic_small.jl index 47a26d0..135fee5 100644 --- a/test/problems/test_sparse_logistic_small.jl +++ b/test/problems/test_sparse_logistic_small.jl @@ -1,8 +1,10 @@ -@testset "Sparse logistic small ($T)" for T in [Float32, Float64] - using ProximalOperators - using ProximalAlgorithms - using LinearAlgebra +using Zygote +using AbstractDifferentiation: ZygoteBackend +using ProximalOperators: NormL1 +using ProximalAlgorithms +using LinearAlgebra +@testset "Sparse logistic small ($T)" for T in [Float32, Float64] A = T[ 1.0 -2.0 3.0 -4.0 5.0 2.0 -1.0 0.0 -1.0 3.0 @@ -15,8 +17,13 @@ R = real(T) - f = Translate(LogisticLoss(ones(R, m), R(1)), -b) - f2 = ComposeAffine(LogisticLoss(ones(R, m), R(1)), A, -b) + function logistic_loss(logits) + u = 1 .+ exp.(-logits) # labels are assumed all one + return sum(log.(u)) + end + + f_autodiff = ProximalAlgorithms.AutoDifferentiable(x -> logistic_loss(x - b), ZygoteBackend()) + fA_autodiff = ProximalAlgorithms.AutoDifferentiable(x -> logistic_loss(A * x - b), ZygoteBackend()) lam = R(0.1) g = NormL1(lam) @@ -29,7 +36,7 @@ x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.ForwardBackward(tol = TOL, adaptive = true) - x, it = solver(x0 = x0, f = f2, g = g) + x, it = solver(x0 = x0, f = fA_autodiff, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= 1e-4 @test it < 1100 @@ -40,7 +47,7 @@ x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.FastForwardBackward(tol = TOL, adaptive = true) - x, it = solver(x0 = x0, f = f2, g = g) + x, it = solver(x0 = x0, f = fA_autodiff, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= 1e-4 @test it < 500 @@ -51,7 +58,7 @@ x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.ZeroFPR(adaptive = true, tol = TOL) - x, it = solver(x0 = x0, f = f, A = A, g = g) + x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= 1e-4 @test it < 25 @@ -62,7 +69,7 @@ x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.PANOC(adaptive = true, tol = TOL) - x, it = solver(x0 = x0, f = f, A = A, g = g) + x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= 1e-4 @test it < 50 @@ -73,7 +80,7 @@ x0 = zeros(T, n) x0_backup = copy(x0) solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL) - x, it = solver(x0 = x0, f = f, A = A, g = g) + x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= 1e-4 @test it < 50 diff --git a/test/problems/test_verbose.jl b/test/problems/test_verbose.jl index ff942c1..573d7fe 100644 --- a/test/problems/test_verbose.jl +++ b/test/problems/test_verbose.jl @@ -1,8 +1,10 @@ -@testset "Verbose" for T in [Float64] - using ProximalOperators - using ProximalAlgorithms - using LinearAlgebra +using Zygote +using AbstractDifferentiation: ZygoteBackend +using ProximalOperators: LeastSquares, NormL1 +using ProximalAlgorithms +using LinearAlgebra +@testset "Verbose" for T in [Float64] A = T[ 1.0 -2.0 3.0 -4.0 5.0 2.0 -1.0 0.0 -1.0 3.0 @@ -18,8 +20,9 @@ lam = R(0.1) * norm(A' * b, Inf) @test typeof(lam) == R - f = Translate(SqrNormL2(R(1)), -b) - f2 = LeastSquares(A, b) + f_autodiff = ProximalAlgorithms.AutoDifferentiable(x -> (norm(x - b)^2)/2, ZygoteBackend()) + fA_autodiff = ProximalAlgorithms.AutoDifferentiable(x -> (norm(A*x - b)^2)/2, ZygoteBackend()) + fA_prox = LeastSquares(A, b) g = NormL1(lam) Lf = opnorm(A)^2 @@ -34,7 +37,7 @@ x0 = zeros(T, n) solver = ProximalAlgorithms.ForwardBackward(tol = TOL, verbose = true) - x, it = solver(x0 = x0, f = f2, g = g, Lf = Lf) + x, it = solver(x0 = x0, f = fA_autodiff, g = g, Lf = Lf) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 150 @@ -47,7 +50,7 @@ adaptive = true, verbose = true, ) - x, it = solver(x0 = x0, f = f2, g = g) + x, it = solver(x0 = x0, f = fA_autodiff, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 300 @@ -57,7 +60,7 @@ x0 = zeros(T, n) solver = ProximalAlgorithms.FastForwardBackward(tol = TOL, verbose = true) - x, it = solver(x0 = x0, f = f2, g = g, Lf = Lf) + x, it = solver(x0 = x0, f = fA_autodiff, g = g, Lf = Lf) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 100 @@ -70,7 +73,7 @@ adaptive = true, verbose = true, ) - x, it = solver(x0 = x0, f = f2, g = g) + x, it = solver(x0 = x0, f = fA_autodiff, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 200 @@ -82,7 +85,7 @@ x0 = zeros(T, n) solver = ProximalAlgorithms.ZeroFPR(tol = TOL, verbose = true) - x, it = solver(x0 = x0, f = f, A = A, g = g, Lf = Lf) + x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g, Lf = Lf) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -91,7 +94,7 @@ x0 = zeros(T, n) solver = ProximalAlgorithms.ZeroFPR(adaptive = true, tol = TOL, verbose = true) - x, it = solver(x0 = x0, f = f, A = A, g = g) + x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -104,7 +107,7 @@ x0 = zeros(T, n) solver = ProximalAlgorithms.PANOC(tol = TOL, verbose = true) - x, it = solver(x0 = x0, f = f, A = A, g = g, Lf = Lf) + x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g, Lf = Lf) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -113,7 +116,7 @@ x0 = zeros(T, n) solver = ProximalAlgorithms.PANOC(adaptive = true, tol = TOL, verbose = true) - x, it = solver(x0 = x0, f = f, A = A, g = g) + x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -126,7 +129,7 @@ x0 = zeros(T, n) solver = ProximalAlgorithms.PANOCplus(tol = TOL, verbose = true) - x, it = solver(x0 = x0, f = f, A = A, g = g, Lf = Lf) + x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g, Lf = Lf) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -135,7 +138,7 @@ x0 = zeros(T, n) solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL, verbose = true) - x, it = solver(x0 = x0, f = f, A = A, g = g) + x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -152,7 +155,7 @@ tol = TOL, verbose = true, ) - y, it = solver(x0 = x0, f = f2, g = g) + y, it = solver(x0 = x0, f = fA_prox, g = g) @test eltype(y) == T @test norm(y - x_star, Inf) <= TOL @test it < 30 diff --git a/test/runtests.jl b/test/runtests.jl index 6532278..8380741 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,14 +1,24 @@ using Test using Aqua +using AbstractDifferentiation using ProximalAlgorithms +struct Quadratic{M, V} + Q::M + q::V +end + +(f::Quadratic)(x) = dot(x, f.Q * x) / 2 + dot(f.q, x) + +function ProximalAlgorithms.value_and_gradient_closure(f::Quadratic, x) + grad = f.Q * x + f.q + return dot(grad, x) / 2 + dot(f.q, x), () -> grad +end + @testset "Aqua" begin Aqua.test_all(ProximalAlgorithms; ambiguities=false) end -include("definitions/arraypartition.jl") -include("definitions/compose.jl") - include("utilities/test_ad.jl") include("utilities/test_iteration_tools.jl") include("utilities/test_fb_tools.jl") @@ -22,8 +32,6 @@ include("problems/test_equivalence.jl") include("problems/test_elasticnet.jl") include("problems/test_lasso_small.jl") include("problems/test_lasso_small_strongly_convex.jl") -include("problems/test_lasso_small_v_split.jl") -include("problems/test_lasso_small_h_split.jl") include("problems/test_linear_programs.jl") include("problems/test_sparse_logistic_small.jl") include("problems/test_nonconvex_qp.jl") diff --git a/test/utilities/test_ad.jl b/test/utilities/test_ad.jl index a4fa27f..25c15b3 100644 --- a/test/utilities/test_ad.jl +++ b/test/utilities/test_ad.jl @@ -2,8 +2,18 @@ using Test using LinearAlgebra using ProximalOperators: NormL1 using ProximalAlgorithms +using Zygote +using ReverseDiff +using AbstractDifferentiation: ZygoteBackend, ReverseDiffBackend + +@testset "Autodiff backend ($B on $T)" for (T, B) in Iterators.product( + [Float32, Float64, ComplexF32, ComplexF64], + [ZygoteBackend, ReverseDiffBackend], +) + if T <: Complex && B == ReverseDiffBackend + continue + end -@testset "Autodiff ($T)" for T in [Float32, Float64, ComplexF32, ComplexF64] R = real(T) A = T[ 1.0 -2.0 3.0 -4.0 5.0 @@ -12,30 +22,23 @@ using ProximalAlgorithms -1.0 -1.0 -1.0 1.0 3.0 ] b = T[1.0, 2.0, 3.0, 4.0] - f = ProximalAlgorithms.ZygoteFunction( - x -> R(1/2) * norm(A * x - b, 2)^2 - ) + f = ProximalAlgorithms.AutoDifferentiable(x -> R(1/2) * norm(A * x - b, 2)^2, B()) Lf = opnorm(A)^2 m, n = size(A) - @testset "Gradient" begin - x = randn(T, n) - gradfx, fx = ProximalAlgorithms.gradient(f, x) - @test eltype(gradfx) == T - @test typeof(fx) == R - @test gradfx ≈ A' * (A * x - b) - end + x0 = zeros(T, n) - @testset "Algorithms" begin - lam = R(0.1) * norm(A' * b, Inf) - @test typeof(lam) == R - g = NormL1(lam) - x_star = T[-3.877278911564627e-01, 0, 0, 2.174149659863943e-02, 6.168435374149660e-01] - TOL = R(1e-4) - solver = ProximalAlgorithms.FastForwardBackward(tol = TOL) - x, it = solver(x0 = zeros(T, n), f = f, g = g, Lf = Lf) - @test eltype(x) == T - @test norm(x - x_star, Inf) <= TOL - @test it < 100 - end + f_x0, cl = ProximalAlgorithms.value_and_gradient_closure(f, x0) + grad_f_x0 = @inferred cl() + + lam = R(0.1) * norm(A' * b, Inf) + @test typeof(lam) == R + g = NormL1(lam) + x_star = T[-3.877278911564627e-01, 0, 0, 2.174149659863943e-02, 6.168435374149660e-01] + TOL = R(1e-4) + solver = ProximalAlgorithms.FastForwardBackward(tol = TOL) + x, it = solver(x0 = x0, f = f, g = g, Lf = Lf) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= TOL + @test it < 100 end diff --git a/test/utilities/test_fb_tools.jl b/test/utilities/test_fb_tools.jl index 75a3d50..8205e08 100644 --- a/test/utilities/test_fb_tools.jl +++ b/test/utilities/test_fb_tools.jl @@ -1,39 +1,41 @@ using Test using LinearAlgebra -using ProximalAlgorithms: lower_bound_smoothness_constant, backtrack_stepsize! -using ProximalOperators: Quadratic, Zero +using ProximalCore: Zero +using ProximalAlgorithms +using AbstractDifferentiation @testset "Lipschitz constant estimation" for R in [Float32, Float64] -sv = R[0.01, 1.0, 1.0, 1.0, 100.0] -n = length(sv) -U, _ = qr(randn(n, n)) -Q = U * Diagonal(sv) * U' -q = randn(n) + sv = R[0.01, 1.0, 1.0, 1.0, 100.0] + n = length(sv) + U, _ = qr(randn(R, n, n)) + Q = U * Diagonal(sv) * U' + q = randn(R, n) -f = Quadratic(Q, q) -Lf = maximum(sv) -g = Zero() + f = Quadratic(Q, q) + Lf = maximum(sv) + g = Zero() -for _ in 1:100 - x = randn(n) - Lest = @inferred lower_bound_smoothness_constant(f, I, x) - @test Lest <= Lf -end - -x = randn(n) -Lest = @inferred lower_bound_smoothness_constant(f, I, x) -alpha = R(0.5) -gamma_init = 10 / Lest -gamma = gamma_init + for _ in 1:100 + x = randn(R, n) + Lest = @inferred ProximalAlgorithms.lower_bound_smoothness_constant(f, I, x) + @test typeof(Lest) == R + @test Lest <= Lf + end -for _ in 1:100 x = randn(n) - new_gamma, = @inferred backtrack_stepsize!(gamma, f, I, g, x, alpha=alpha) - @test new_gamma <= gamma - gamma = new_gamma -end - -@test gamma < gamma_init + Lest = @inferred ProximalAlgorithms.lower_bound_smoothness_constant(f, I, x) + alpha = R(0.5) + gamma_init = 10 / Lest + gamma = gamma_init + + for _ in 1:100 + x = randn(n) + new_gamma, = @inferred ProximalAlgorithms.backtrack_stepsize!(gamma, f, I, g, x, alpha=alpha) + @test new_gamma <= gamma + gamma = new_gamma + end + + @test gamma < gamma_init end