diff --git a/docs/Project.toml b/docs/Project.toml index ed025f5a..5f8d7c4c 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,5 +1,16 @@ [deps] +ADNLPModels = "54578032-b7ea-4c30-94aa-7cbd1cce6c9a" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244" +LLSModels = "39f5bc3e-5160-4bf8-ac48-504fd2534d24" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +NLPModelsModifiers = "e01155f1-5c6f-4375-a9d8-616dd036575f" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +ProximalOperators = "a725b495-10eb-56fe-b38b-717eba820537" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +RegularizedProblems = "ea076b23-609f-44d2-bb12-a4ae45328278" +ShiftedProximalOperators = "d4fd37fa-580c-4e43-9b30-361c21aae263" [compat] -Documenter = "~0.25" +Documenter = "1" +DocumenterCitations = "1.2" diff --git a/docs/build/assets/style.css b/docs/build/assets/style.css new file mode 100644 index 00000000..76026269 --- /dev/null +++ b/docs/build/assets/style.css @@ -0,0 +1,20 @@ +.mi, .mo, .mn { + color: #317293; +} + +a { + color: #3091d1; +} + +a:visited { + color: #3091d1; +} + +a:hover { + color: #ff5722; +} + +nav.toc .logo { + max-width: 256px; + max-height: 256px; +} \ No newline at end of file diff --git a/docs/make.jl b/docs/make.jl index fd1dc2de..58bed37c 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,16 +1,30 @@ -using Documenter, RegularizedOptimization +using Documenter, DocumenterCitations + +using RegularizedOptimization + +bib = CitationBibliography(joinpath(@__DIR__, "references.bib")) makedocs( modules = [RegularizedOptimization], doctest = true, # linkcheck = true, - strict = true, + warnonly = false, format = Documenter.HTML( assets = ["assets/style.css"], prettyurls = get(ENV, "CI", nothing) == "true", ), sitename = "RegularizedOptimization.jl", - pages = Any["Home" => "index.md", "Tutorial" => "tutorial.md", "Reference" => "reference.md"], + pages = [ + "Home" => "index.md", + "Algorithms" => "algorithms.md", + "Examples" => [ + joinpath("examples", "basic.md"), + joinpath("examples", "ls.md"), + ], + "Reference" => "reference.md", + "Bibliography" => "bibliography.md" + ], + plugins = [bib], ) deploydocs( diff --git a/docs/references.bib b/docs/references.bib new file mode 100644 index 00000000..2baf1808 --- /dev/null +++ b/docs/references.bib @@ -0,0 +1,56 @@ +@Article{aravkin-baraldi-orban-2022, + author = {Aravkin, Aleksandr Y. and Baraldi, Robert and Orban, Dominique}, + title = {A Proximal Quasi-Newton Trust-Region Method for Nonsmooth Regularized Optimization}, + journal = {SIAM Journal on Optimization}, + volume = {32}, + number = {2}, + pages = {900-929}, + year = {2022}, + doi = {10.1137/21M1409536} +} + +@Article{aravkin-baraldi-orban-2024, + author = {Aravkin, Aleksandr Y. and Baraldi, Robert and Orban, Dominique}, + title = {A Levenberg–Marquardt Method for Nonsmooth Regularized Least Squares}, + journal = {SIAM Journal on Scientific Computing}, + volume = {46}, + number = {4}, + pages = {A2557-A2581}, + year = {2024}, + doi = {10.1137/22M1538971}, +} + +@Article{leconte-orban-2025, + author = {Leconte, Geoffroy and Orban, Dominique}, + title = {The indefinite proximal gradient method}, + journal = {Computational Optimization and Applications}, + volume = {91}, + number = {2}, + pages = {861-903}, + year = {2025}, + doi = {10.1007/s10589-024-00604-5} +} + +@TechReport{diouane-gollier-orban-2024, + Author = {Diouane, Youssef and Gollier, Maxence and Orban, Dominique}, + Title = {A nonsmooth exact penalty method for equality-constrained optimization: complexity and implementation}, + Institution = {Groupe d’études et de recherche en analyse des décisions}, + Year = {2024}, + Type = {Les Cahiers du GERAD}, + Number = {G-2024-65}, + Address = {Montreal, Canada}, + doi = {10.48550/arxiv.2103.15993}, + url = {https://www.gerad.ca/fr/papers/G-2024-65}, +} + +@TechReport{diouane-habiboullah-orban-2024, + Author = {Diouane, Youssef and Laghdaf Habiboullah, Mohamed and Orban, Dominique}, + Title = {A proximal modified quasi-Newton method for nonsmooth regularized optimization}, + Institution = {Groupe d’études et de recherche en analyse des décisions}, + Year = {2024}, + Type = {Les Cahiers du GERAD}, + Number = {G-2024-64}, + Address = {Montreal, Canada}, + doi = {10.48550/arxiv.2409.19428}, + url = {https://www.gerad.ca/fr/papers/G-2024-64}, +} \ No newline at end of file diff --git a/docs/src/algorithms.md b/docs/src/algorithms.md new file mode 100644 index 00000000..3a31a29d --- /dev/null +++ b/docs/src/algorithms.md @@ -0,0 +1,66 @@ +# [Solvers](@id algorithms) + +## General case +The solvers in this package are based upon the approach of [aravkin-baraldi-orban-2022](@cite). +Suppose we are given the general regularized problem +```math +\underset{x \in \mathbb{R}^n}{\text{minimize}} \quad f(x) + h(x), +``` +where $f : \mathbb{R}^n \mapsto \mathbb{R}$ is continuously differentiable and $h : \mathbb{R}^n \mapsto \mathbb{R} \cup \{\infty\}$ is lower semi-continuous. +Instead of solving the above directly, which is often impossible, we will solve a simplified version of it repeatedly until we reach a stationary point of the problem above. +To do so, suppose we are given an iterate $x_0 \in \mathbb{R}^n$, we wish to compute a step, $s_0 \in \mathbb{R}^n$ and improve our iterate with $x_1 := x_0 + s_0$. +Now, we are going to approximate the functions $f$ and $h$ around $x_0$ with simpler functions (models), which we denote respectively $\varphi(\cdot; x_0)$ and $\psi(\cdot; x_0)$ so that +```math +\varphi(s; x_0) \approx f(x_0 + s) \quad \text{and} \quad \psi(s; x_0) \approx h(x_0 + s). +``` +We then wish to compute the step as +```math +s_0 \in \underset{s \in \mathbb{R}^n}{\argmin} \ \varphi(s; x_0) + \psi(s; x_0). +``` +In order to ensure convergence and to handle the potential nonconvexity of the objective function, we either add a trust-region constraint, +```math +s_0 \in \underset{s \in \mathbb{R}^n}{\argmin} \ \varphi(s; x_0) + \psi(s; x_0) \quad \text{subject to} \ \|s\| \leq \Delta, +``` +or a quadratic regularization +```math +s_0 \in \underset{s \in \mathbb{R}^n}{\argmin} \ \varphi(s; x_0) + \psi(s; x_0) + \tfrac{1}{2} \sigma \|s\|^2_2. +``` +Solvers that work with a trust-region are [`TR`](@ref TR) and [`TRDH`](@ref TRDH) and the ones working with a quadratic regularization are [`R2`](@ref R2), [`R2N`](@ref R2N) and [`R2DH`](@ref R2DH) + +The models for the smooth part `f` in this package are always quadratic models of the form +```math +\varphi(s; x_0) = f(x_0) + \nabla f(x_0)^T s + \tfrac{1}{2} s^T H(x_0) s, +``` +where $H(x_0)$ is a symmetric matrix that can be either $0$, the Hessian of $f$ (if it exists) or a quasi-Newton approximation. +Some solvers require a specific structure for $H$, for an overview, refer to the table below. + +The following table gives an overview of the available solvers in the general case. + +Solver | Quadratic Regularization | Trust Region | Quadratic term for $\varphi$ : H | Reference +----------|--------------------------|--------------|---------------|---------- +[`R2`](@ref R2) | Yes | No | $H = 0$ | [aravkin-baraldi-orban-2022; Algorithm 6.1](@cite) +[`R2N`](@ref R2N) | Yes | No | Any Symmetric| [diouane-habiboullah-orban-2024; Algorithm 1](@cite) +[`R2DH`](@ref R2DH) | Yes | No | Any Diagonal | [diouane-habiboullah-orban-2024; Algorithm 1](@cite) +[`TR`](@ref TR) | No | Yes | Any Symmetric | [aravkin-baraldi-orban-2022; Algorithm 3.1](@cite) +[`TRDH`](@ref TRDH) | No | Yes | Any Diagonal | [leconte-orban-2025; Algorithm 5.1](@cite) + +## Nonlinear least-squares +This package provides two solvers, [`LM`](@ref LM) and [`LMTR`](@ref LMTR), specialized for regularized, nonlinear least-squares, i.e., problems of the form +```math +\underset{x \in \mathbb{R}^n}{\text{minimize}} \quad \tfrac{1}{2}\|F(x)\|_2^2 + h(x), +``` +where $F : \mathbb{R}^n \mapsto \mathbb{R}^m$ is continuously differentiable and $h : \mathbb{R}^n \mapsto \mathbb{R} \cup \{\infty\}$ is lower semi-continuous. +In that case, the model $\varphi$ is defined as +```math +\varphi(s; x) = \tfrac{1}{2}\|F(x) + J(x)s\|_2^2, +``` +where $J(x)$ is the Jacobian of $F$ at $x$. +Similar to the solvers in the previous section, we either add a quadratic regularization to the model ([`LM`](@ref LM)) or a trust-region ([`LMTR`](@ref LMTR)). +These solvers are described in [aravkin-baraldi-orban-2024](@cite). + +## Constrained Optimization +For constrained, regularized optimization, +```math +\underset{x \in \mathbb{R}^n}{\text{minimize}} \quad f(x) + h(x) \quad \text{subject to} \ l \leq x \leq u \ \text{and} \ c(x) = 0, +``` +an augmented Lagrangian method is provided, [`AL`](@ref AL). diff --git a/docs/src/assets/logo.png b/docs/src/assets/logo.png new file mode 100644 index 00000000..f1947e99 Binary files /dev/null and b/docs/src/assets/logo.png differ diff --git a/docs/src/assets/style.css b/docs/src/assets/style.css index 7f1fab0d..76026269 100644 --- a/docs/src/assets/style.css +++ b/docs/src/assets/style.css @@ -1,10 +1,3 @@ -#documenter .docs-sidebar, -html.theme--documenter-dark #documenter .docs-sidebar { - border-right: 4px solid #640000; - background-color: #8c1515; - color: #fff; -} - .mi, .mo, .mn { color: #317293; } diff --git a/docs/src/bibliography.md b/docs/src/bibliography.md new file mode 100644 index 00000000..0b1be7e1 --- /dev/null +++ b/docs/src/bibliography.md @@ -0,0 +1,4 @@ +# Bibliography + +```@bibliography +``` \ No newline at end of file diff --git a/docs/src/examples/basic.md b/docs/src/examples/basic.md new file mode 100644 index 00000000..f16f9369 --- /dev/null +++ b/docs/src/examples/basic.md @@ -0,0 +1,103 @@ +# A regularized optimization problem + +In this tutorial, we will show how to model and solve the nonconvex nonsmooth optimization problem +```math + \min_{x \in \mathbb{R}^2} (1 - x_1)^2 + 100(x_2 - x_1^2)^2 + |x_1| + |x_2|, +``` +which can be seen as a $$\ell_1$$ regularization of the Rosenbrock function. +It can be shown that the solution to the problem is +```math + x^* = \begin{pmatrix} + 0.25\\ + 0.0575 + \end{pmatrix} +``` + + +## Modelling the problem +We first formulate the objective function as the sum of a smooth function $f$ and a nonsmooth regularizer $h$: +```math + (1 - x_1)^2 + 100(x_2 - x_1^2)^2 + |x_1| + |x_2| = f(x_1, x_2) + h(x_1, x_2), +``` +where +```math +\begin{align*} +f(x_1, x_2) &:= (1 - x_1)^2 + 100(x_2 - x_1^2)^2,\\ +h(x_1, x_2) &:= \|x\|_1. +\end{align*} +``` +To model $f$, we are going to use [ADNLPModels.jl](https://github.com/JuliaSmoothOptimizers/ADNLPModels.jl). +For the nonsmooth regularizer, we use [ProximalOperators.jl](https://github.com/JuliaFirstOrder/ProximalOperators.jl). +We then wrap the smooth function and the regularizer in a `RegularizedNLPModel` + +```@example basic +using ADNLPModels +using ProximalOperators +using RegularizedProblems + +# Model the function +f_fun = x -> (1 - x[1])^2 + 100*(x[2] - x[1]^2)^2 + +# Choose a starting point for the optimization process, for the sake of this example, we choose +x0 = [-1.0, 2.0] + +# Get an NLPModel corresponding to the smooth function f +f_model = ADNLPModel(f_fun, x0, name = "AD model of f") + +# Get the regularizer from ProximalOperators +h = NormL1(1.0) + +# Wrap into a RegularizedNLPModel +regularized_pb = RegularizedNLPModel(f_model, h) +``` + +## Solving the problem +We can now choose one of the solvers presented [here](@ref algorithms) to solve the problem we defined above. +Please refer to other sections of this documentation to make the wisest choice for your particular problem. +Depending on the problem structure and on requirements from the user, some solvers are more appropriate than others. +The following tries to give a quick overview of what choices one can make. + +Suppose for example that we don't want to use a quasi-Newton approach and that we don't have access to the Hessian of f, or that we don't want to incur the cost of computing it. +In this case, the most appropriate solver would be R2. +For this example, we also choose a tolerance by specifying the keyword arguments `atol` and `rtol` across all solvers. + +```@example basic +using RegularizedOptimization + +out = R2(regularized_pb, verbose = 100, atol = 1e-6, rtol = 1e-6) +println("R2 converged after $(out.iter) iterations to the solution x = $(out.solution)") +``` + +Now, we can actually use second information on f. +To do so, we are going to use TR, a trust-region solver that can exploit second order information. +```@example basic + +out = TR(regularized_pb, verbose = 100, atol = 1e-6, rtol = 1e-6) +println("TR converged after $(out.iter) iterations to the solution x = $(out.solution)") +``` + +Suppose for some reason we can not compute the Hessian. +In this case, we can try to switch to a quasi-Newton approximation, this can be done with NLPModelsModifiers.jl +We could choose to use TR again but for the sake of this tutorial we run it with R2N +```@example basic +using NLPModelsModifiers + +# Switch the model of the smooth function to a quasi-Newton approximation +f_model_lsr1 = LSR1Model(f_model) +regularized_pb_lsr1 = RegularizedNLPModel(f_model_lsr1, h) + +# Solve with R2N +out = R2N(regularized_pb_lsr1, verbose = 100, atol = 1e-6, rtol = 1e-6) +println("R2N converged after $(out.iter) iterations to the solution x = $(out.solution)") +``` + +Finally, TRDH and R2DH are specialized for diagonal quasi-Newton approximations, and should be used instead of TR and R2N, respectively. +```@example basic + +f_model_sg = SpectralGradientModel(f_model) +regularized_pb_sg = RegularizedNLPModel(f_model_sg, h) + +# Solve with R2DH +out = R2DH(regularized_pb_sg, verbose = 100, atol = 1e-6, rtol = 1e-6) +println("R2DH converged after $(out.iter) iterations to the solution x = $(out.solution)") +``` diff --git a/docs/src/examples/ls.md b/docs/src/examples/ls.md new file mode 100644 index 00000000..cb0ecf49 --- /dev/null +++ b/docs/src/examples/ls.md @@ -0,0 +1,109 @@ +# A regularized nonlinear least-square problem + +In this tutorial, we will show how to model and solve the nonconvex nonsmooth least-square problem +```math + \min_{x \in \mathbb{R}^2} \tfrac{1}{2} \sum_{i=1}^m \big(y_i - x_1 e^{x_2 t_i}\big)^2 + \lambda \|x\|_0. +``` +This problem models the fitting of an exponential curve, given noisy data. + +## Modelling the problem +We first formulate the objective function as the sum of a smooth function $f$ and a nonsmooth regularizer $h$: +```math + \tfrac{1}{2} \sum_{i=1}^m \big(y_i - x_1 e^{x_2 t_i}\big)^2 + \lambda \|x\|_0 = f(x) + h(x), +``` +where +```math +\begin{align*} +f(x) &:= \tfrac{1}{2} \sum_{i=1}^m \big(y_i - x_1 e^{x_2 t_i}\big)^2,\\ +h(x) &:= \lambda\|x\|_0. +\end{align*} +``` + +To model $f$, we are going to use [ADNLPModels.jl](https://github.com/JuliaSmoothOptimizers/ADNLPModels.jl). +For the nonsmooth regularizer, we use [ProximalOperators.jl](https://github.com/JuliaFirstOrder/ProximalOperators.jl). +We then wrap the smooth function and the regularizer in a `RegularizedNLPModel`. + +```@example ls +using ADNLPModels +using ProximalOperators +using Random +using RegularizedProblems + +Random.seed!(0) + +# Generate synthetic nonlinear least-squares data +m = 100 +t = range(0, 1, length=m) +a_true, b_true = 2.0, -1.0 +y = [a_true * exp(b_true * ti) + 0.1*randn() for ti in t] + +# Starting point +x0 = [1.0, 0.0] # [a, b] + +# Define nonlinear residuals +function F(x) + a, b = x + return [yi - a*exp(b*ti) for (ti, yi) in zip(t, y)] +end + +# Build ADNLSModel +f_model = ADNLSModel(F, x0, m, name = "nonlinear LS model of f") + +# Get the regularizer from ProximalOperators +λ = 0.01 +h = NormL0(λ) + +# Wrap into a RegularizedNLPModel +regularized_pb = RegularizedNLPModel(f_model, h) +``` + +## Solving the problem +We can now choose one of the solvers presented [here](@ref algorithms) to solve the problem we defined above. +In the case of least-squares, it is usually more appropriate to choose LM or LMTR. + +```@example ls +using RegularizedOptimization + +# LM is a quadratic regularization method. +out = LM(regularized_pb, verbose = 1, atol = 1e-4) +println("LM converged after $(out.iter) iterations.") +``` + +We can visualize the solution with plots, +```@example ls +using Plots + +# Extract estimated parameters +a_est, b_est = out.solution + +# True curve +y_true = [a_true * exp(b_true * ti) for ti in t] + +# Estimated curve +y_est = [a_est * exp(b_est * ti) for ti in t] + +# Plot +scatter(t, y, label="Noisy data", legend=:bottomleft) +plot!(t, y_true, label="True model", lw=2) +plot!(t, y_est, label="Fitted model", lw=2, ls=:dash) +``` + +```@example ls +#We can choose LMTR instead which is a trust-region method +out = LMTR(regularized_pb, verbose = 1, atol = 1e-4) +println("LMTR converged after $(out.iter) iterations.") +``` + +```@example ls +# Extract estimated parameters +a_est, b_est = out.solution + +# Estimated curve +y_est = [a_est * exp(b_est * ti) for ti in t] + +# Plot +scatter(t, y, label="Noisy data", legend=:bottomleft) +plot!(t, y_true, label="True model", lw=2) +plot!(t, y_est, label="Fitted model", lw=2, ls=:dash) + +``` \ No newline at end of file diff --git a/docs/src/index.md b/docs/src/index.md index 4b393b13..10a63c6d 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1 +1,53 @@ # RegularizedOptimization.jl + +This package implements a family of algorithms to solve nonsmooth optimization problems of the form + +```math +\underset{x \in \mathbb{R}^n}{\text{minimize}} \quad f(x) + h(x), \quad \text{subject to} \ c(x) = 0. +``` + +where $f: \mathbb{R}^n \to \mathbb{R}$ and $c: \mathbb{R}^n \to \mathbb{R}^m$ are continuously differentiable, and $h: \mathbb{R}^n \to \mathbb{R} \cup \{+\infty\}$ is lower semi-continuous. +The nonsmooth objective $h$ can be a *regularizer* such as a sparsity-inducing penalty, model simple constraints such as $x$ belonging to a simple convex set, or be a combination of both. +All $f$, $h$ and $c$ can be nonconvex. + +All solvers implemented in this package are JuliaSmoothOptimizers-compliant. +They take a [`RegularizedNLPModel`](https://jso.dev/RegularizedProblems.jl/dev/reference#RegularizedProblems.RegularizedNLPModel) as input and return a [`GenericExecutionStats`](https://jso.dev/SolverCore.jl/stable/reference/#SolverCore.GenericExecutionStats). + +A [`RegularizedNLPModel`](https://jso.dev/RegularizedProblems.jl/stable/reference#RegularizedProblems.RegularizedNLPModel) contains: + +- a smooth component `f` represented as an [`AbstractNLPModel`](https://github.com/JuliaSmoothOptimizers/NLPModels.jl), +- a nonsmooth regularizer `h`. + +For the smooth component `f`, we refer to [jso.dev](https://jso.dev) for tutorials on the `NLPModels` API. This framework allows the usage of models from +- AMPL ([AmplNLReader.jl](https://github.com/JuliaSmoothOptimizers/AmplNLReader.jl)), +- CUTEst ([CUTEst.jl](https://github.com/JuliaSmoothOptimizers/CUTEst.jl)), +- JuMP ([NLPModelsJuMP.jl](https://github.com/JuliaSmoothOptimizers/NLPModelsJuMP.jl)), +- PDE-constrained problems ([PDENLPModels.jl](https://github.com/JuliaSmoothOptimizers/PDENLPModels.jl)), +- models defined with automatic differentiation ([ADNLPModels.jl](https://github.com/JuliaSmoothOptimizers/ADNLPModels.jl)). +We refer to [ManualNLPModels.jl](https://github.com/JuliaSmoothOptimizers/ManualNLPModels.jl) for users interested in defining their own model. + +For the nonsmooth component `h` we refer to [ShiftedProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ShiftedProximalOperators.jl). +Regularizers are available in +- [ProximalOperators.jl](https://github.com/JuliaFirstOrder/ProximalOperators.jl) +- [ShiftedProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ShiftedProximalOperators.jl) + +--- + +## How to Install + +RegularizedOptimization can be installed through the Julia package manager: + +```julia +julia> ] +pkg> add https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl +``` + +--- + +## Bug reports and discussions + +If you think you found a bug, please open an [issue](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl/issues). +Focused suggestions and requests can also be opened as issues. Before opening a pull request, we recommend starting an issue or a discussion first. + +For general questions not suited for a bug report, feel free to start a discussion [here](https://github.com/JuliaSmoothOptimizers/Organization/discussions). +This forum is for questions and discussions about any of the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) packages. \ No newline at end of file diff --git a/docs/src/reference.md b/docs/src/reference.md index 84a547b1..11225b20 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -1,11 +1,5 @@ # Reference -## Contents - -```@contents -Pages = ["reference.md"] -``` - ## Index ```@index diff --git a/docs/src/tutorial.md b/docs/src/tutorial.md deleted file mode 100644 index f681361d..00000000 --- a/docs/src/tutorial.md +++ /dev/null @@ -1 +0,0 @@ -# RegularizedOptimization Tutorial diff --git a/src/R2_alg.jl b/src/R2_alg.jl index b215101b..639af19f 100644 --- a/src/R2_alg.jl +++ b/src/R2_alg.jl @@ -354,7 +354,7 @@ function SolverCore.solve!( hk = @views h(xk[selected]) if hk == Inf verbose > 0 && @info "R2: finding initial guess where nonsmooth term is finite" - prox!(xk, h, xk, one(eltype(x0))) + prox!(xk, h, xk, one(eltype(xk))) hk = @views h(xk[selected]) hk < Inf || error("prox computation must be erroneous") verbose > 0 && @debug "R2: found point where h has value" hk