diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml index 1f6fc01c..5bbf32d0 100644 --- a/.github/workflows/draft-pdf.yml +++ b/.github/workflows/draft-pdf.yml @@ -28,25 +28,4 @@ jobs: # This is the output path where Pandoc will write the compiled # PDF. Note, this should be the same directory as the input # paper.md - path: paper/paper.pdf - - name: Create release - if: github.event_name == 'push' - uses: rymndhng/release-on-push-action@master - id: release - with: - bump_version_scheme: patch - tag_prefix: v - release_body: "" - use_github_release_notes: true - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Upload PDF to release - if: github.event_name == 'push' - uses: svenstaro/upload-release-action@v2 - with: - repo_token: ${{ secrets.GITHUB_TOKEN }} - file: paper/paper.pdf - asset_name: joss-draft.pdf - tag: ${{ steps.release.outputs.tag_name }} - overwrite: true - body: "" + path: paper/paper.pdf \ No newline at end of file diff --git a/.github/workflows/wordcount.yml b/.github/workflows/wordcount.yml new file mode 100644 index 00000000..5c7be5d7 --- /dev/null +++ b/.github/workflows/wordcount.yml @@ -0,0 +1,43 @@ +name: Word Count + +on: + pull_request: + types: [opened, synchronize, reopened] + +permissions: + contents: read + pull-requests: write + issues: write + +jobs: + count-words: + runs-on: ubuntu-latest + + steps: + - name: Checkout repo + uses: actions/checkout@v4 + + - name: Install Pandoc + run: sudo apt-get update && sudo apt-get install -y pandoc + + - name: Count words in paper.md + id: wordcount + run: | + COUNT=$(pandoc paper/paper.md -t plain | wc -w) + echo "count=$COUNT" >> $GITHUB_OUTPUT + echo "馃摑 JOSS Word Count: $COUNT words" + if [ "$COUNT" -gt 1000 ]; then + echo "::warning title=JOSS Word Count::Paper exceeds 1000 words ($COUNT)." + fi + + - name: Comment on PR with word count + if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: joss-wordcount + message: | + 馃摑 **JOSS Word Count** + + The current word count for `paper.md` is **${{ steps.wordcount.outputs.count }}** words. + + _(JOSS recommends 250-1000 words for the main text.)_ \ No newline at end of file diff --git a/paper/examples/Benchmark.tex b/paper/examples/Benchmark.tex index 41685108..1c10c244 100644 --- a/paper/examples/Benchmark.tex +++ b/paper/examples/Benchmark.tex @@ -1,13 +1,9 @@ -\begin{tabular}{lcrrrrr} +\begin{tabular}{llrrrrr} \hline - \textbf{Method} & \textbf{Status} & \textbf{$t$($s$)} & \textbf{$\#f$} & \textbf{$\#\nabla f$} & \textbf{$\#prox$} & \textbf{Objective} \\\hline - TR (LSR1, SVM) & first\_order & 3.9349 & 347 & 291 & 4037 & 179.837 \\ - R2N (LSR1, SVM) & first\_order & 1.9511 & 185 & 101 & 27932 & 192.493 \\ - LM (SVM) & first\_order & 19.7826 & 6 & 2876 & 1001 & 201.186 \\ - LMTR (SVM) & first\_order & 12.4967 & 11 & 1614 & 432 & 188.274 \\ + Method & Status & $t$($s$) & $\#f$ & $\#\nabla f$ & $\#prox$ & Objective \\\hline + TR & first\_order & 3.9349 & 347 & 291 & 4037 & 179.837 \\ + R2N & first\_order & 1.9511 & 185 & 101 & 27932 & 192.493 \\ + LM & first\_order & 19.7826 & 6 & 2876 & 1001 & 201.186 \\ + LMTR & first\_order & 12.4967 & 11 & 1614 & 432 & 188.274 \\ \hline - TR (LBFGS, NNMF) & first\_order & 0.1014 & 42 & 40 & 3160 & 976.06 \\ - R2N (LBFGS, NNMF) & first\_order & 0.4913 & 169 & 107 & 17789 & 411.727 \\ - LM (NNMF) & first\_order & 0.1157 & 14 & 7042 & 2601 & 131.184 \\ - LMTR (NNMF) & first\_order & 0.0697 & 9 & 4066 & 1435 & 131.186 \\\hline \end{tabular} diff --git a/paper/paper.bib b/paper/paper.bib index 37da40fd..92939515 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -1,7 +1,7 @@ @Article{ aravkin-baraldi-orban-2022, Author = {A. Y. Aravkin and R. Baraldi and D. Orban}, Title = {A Proximal Quasi-{N}ewton Trust-Region Method for Nonsmooth Regularized Optimization}, - Journal = siopt, + Journal = {SIAM J. Optim.}, Year = 2022, Volume = 32, Number = 2, @@ -12,7 +12,7 @@ @Article{ aravkin-baraldi-orban-2022 @Article{ aravkin-baraldi-orban-2024, Author = {A. Y. Aravkin and R. Baraldi and D. Orban}, Title = {A {L}evenberg鈥搟M}arquardt Method for Nonsmooth Regularized Least Squares}, - Journal = sisc, + Journal = {SIAM J. Sci. Comput.}, Year = 2024, Volume = 46, Number = 4, @@ -23,7 +23,7 @@ @Article{ aravkin-baraldi-orban-2024 @Software{ leconte_linearoperators_jl_linear_operators_2023, Author = {Leconte, Geoffroy and Orban, Dominique and Soares Siqueira, Abel and contributors}, license = {MPL-2.0}, - Title = {{LinearOperators.jl: Linear Operators for Julia}}, + Title = {{LinearOperators.jl}: Linear Operators for Julia}, url = {https://github.com/JuliaSmoothOptimizers/LinearOperators.jl}, version = {2.6.0}, Year = 2023, @@ -32,34 +32,33 @@ @Software{ leconte_linearoperators_jl_linear_operators_2023 @Article{ leconte-orban-2023, Author = {G. Leconte and D. Orban}, Title = {The Indefinite Proximal Gradient Method}, - Journal = coap, + Journal = {Comput. Optim. Appl.}, Year = 2025, Volume = 91, Number = 2, - Pages = 861--903, + Pages = {861--903}, doi = {10.1007/s10589-024-00604-5}, } @TechReport{ leconte-orban-2023-2, Author = {G. Leconte and D. Orban}, Title = {Complexity of trust-region methods with unbounded {H}essian approximations for smooth and nonsmooth optimization}, - Institution = gerad, + Institution = {GERAD}, Year = 2023, Type = {Cahier}, Number = {G-2023-65}, - Address = gerad-address, + Address = {Montr\'eal, QC, Canada}, url = {https://www.gerad.ca/fr/papers/G-2023-65}, } @TechReport{ diouane-habiboullah-orban-2024, Author = {Youssef Diouane and Mohamed Laghdaf Habiboullah and Dominique Orban}, - Title = {A proximal modified quasi-Newton method for nonsmooth regularized optimization}, + Title = {A proximal modified quasi-{N}ewton method for nonsmooth regularized optimization}, Institution = {GERAD}, Year = 2024, Type = {Cahier}, Number = {G-2024-64}, Address = {Montr\'eal, Canada}, - doi = {10.48550/arxiv.2409.19428}, url = {https://www.gerad.ca/fr/papers/G-2024-64}, } @@ -74,47 +73,47 @@ @TechReport{ diouane-gollier-orban-2024 doi = {10.13140/RG.2.2.16095.47527}, } -@Misc{orban-siqueira-cutest-2020, - author = {D. Orban and A. S. Siqueira and {contributors}}, - title = {{CUTEst.jl}: {J}ulia's {CUTEst} interface}, - month = {October}, - url = {https://github.com/JuliaSmoothOptimizers/CUTEst.jl}, - year = {2020}, - DOI = {10.5281/zenodo.1188851}, +@Software{ orban-siqueira-cutest-2020, + author = {D. Orban and A. S. Siqueira and {contributors}}, + title = {{CUTEst.jl}: {J}ulia's {CUTEst} interface}, + month = {October}, + url = {https://github.com/JuliaSmoothOptimizers/CUTEst.jl}, + year = {2020}, + DOI = {10.5281/zenodo.1188851}, } -@Misc{orban-siqueira-nlpmodels-2020, - author = {D. Orban and A. S. Siqueira and {contributors}}, - title = {{NLPModels.jl}: Data Structures for Optimization Models}, - month = {July}, - url = {https://github.com/JuliaSmoothOptimizers/NLPModels.jl}, - year = {2020}, - DOI = {10.5281/zenodo.2558627}, +@Software{ orban-siqueira-nlpmodels-2020, + author = {D. Orban and A. S. Siqueira and {contributors}}, + title = {{NLPModels.jl}: Data Structures for Optimization Models}, + month = {July}, + url = {https://github.com/JuliaSmoothOptimizers/NLPModels.jl}, + year = {2020}, + DOI = {10.5281/zenodo.2558627}, } -@Misc{jso, - author = {T. Migot and D. Orban and A. S. Siqueira}, - title = {The {JuliaSmoothOptimizers} Ecosystem for Linear and Nonlinear Optimization}, - year = {2021}, - url = {https://juliasmoothoptimizers.github.io/}, - doi = {10.5281/zenodo.2655082}, +@Software{ jso, + author = {T. Migot and D. Orban and A. S. Siqueira}, + title = {The {JuliaSmoothOptimizers} Ecosystem for Linear and Nonlinear Optimization}, + year = {2021}, + url = {https://juliasmoothoptimizers.github.io/}, + doi = {10.5281/zenodo.2655082}, } -@Misc{migot-orban-siqueira-optimizationproblems-2023, +@Software{ migot-orban-siqueira-optimizationproblems-2023, author = {T. Migot and D. Orban and A. S. Siqueira}, - title = {OptimizationProblems.jl: A collection of optimization problems in Julia}, + title = {{OptimizationProblems.jl}: A collection of optimization problems in {J}ulia}, year = {2023}, doi = {10.5281/zenodo.3672094}, url = {https://github.com/JuliaSmoothOptimizers/OptimizationProblems.jl}, } -@techreport{kim-park-2008, - title = {Sparse Nonnegative Matrix Factorization for Clustering}, - author = {Jingu Kim and Haesun Park}, - institution = {Georgia Inst. of Technology}, - number = {GT-CSE-08-01}, - year = {2008}, - url = {http://hdl.handle.net/1853/20058}, +@techreport{ kim-park-2008, + title = {Sparse Nonnegative Matrix Factorization for Clustering}, + author = {Jingu Kim and Haesun Park}, + institution = {Georgia Inst. of Technology}, + number = {GT-CSE-08-01}, + year = {2008}, + url = {http://hdl.handle.net/1853/20058}, } @InProceedings{ stella-themelis-sopasakis-patrinos-2017, @@ -126,10 +125,10 @@ @InProceedings{ stella-themelis-sopasakis-patrinos-2017 doi = {10.1109/CDC.2017.8263933}, } -@article{demarchi-jia-kanzow-mehlitz-2023, +@article{ demarchi-jia-kanzow-mehlitz-2023, author = {De~Marchi, Alberto and Jia, Xiaoxi and Kanzow, Christian and Mehlitz, Patrick}, title = {Constrained composite optimization and augmented {L}agrangian methods}, - journal = {Mathematical Programming}, + journal = {Math. Program.}, year = {2023}, month = {9}, volume = {201}, @@ -140,8 +139,8 @@ @article{demarchi-jia-kanzow-mehlitz-2023 @Article{ themelis-stella-patrinos-2017, Author = {Themelis, Andreas and Stella, Lorenzo and Patrinos, Panagiotis}, - Title = {Forward-Backward Envelope for the Sum of Two Nonconvex Functions: Further Properties and Nonmonotone line seach Algorithms}, - Journal = siopt, + Title = {Forward-Backward Envelope for the Sum of Two Nonconvex Functions: Further Properties and Nonmonotone line search Algorithms}, + Journal = {SIAM J. Optim.}, Year = 2018, Volume = 28, Number = 3, @@ -149,13 +148,13 @@ @Article{ themelis-stella-patrinos-2017 doi = {10.1137/16M1080240}, } -@article{eckstein1992douglas, - title={On the Douglas鈥擱achford splitting method and the proximal point algorithm for maximal monotone operators}, - author={Eckstein, Jonathan and Bertsekas, Dimitri P}, - journal={Mathematical programming}, - volume={55}, - number={1}, - pages={293--318}, - year={1992}, - publisher={Springer} +@article{ eckstein-bertsekas-1992, + title = {On the {D}ouglas鈥攞R}achford splitting method and the proximal point algorithm for maximal monotone operators}, + author = {Eckstein, Jonathan and Bertsekas, Dimitri P}, + journal = {Math. Program.}, + volume = {55}, + number = {1}, + pages = {293--318}, + year = {1992}, + publisher = {Springer} } diff --git a/paper/paper.md b/paper/paper.md index 35f63522..f625dcea 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -37,24 +37,24 @@ header-includes: | \underset{x \in \mathbb{R}^n}{\text{minimize}} \quad f(x) + h(x) \quad \text{subject to} \quad c(x) = 0, \end{equation} where $f: \mathbb{R}^n \to \mathbb{R}$ and $c: \mathbb{R}^n \to \mathbb{R}^m$ are continuously differentiable, and $h: \mathbb{R}^n \to \mathbb{R} \cup \{+\infty\}$ is lower semi-continuous. -The nonsmooth objective $h$ can be a *regularizer* such as a sparsity-inducing penalty, model simple constraints such as $x$ belonging to a simple convex set, or be a combination of both. +The nonsmooth objective $h$ can be a *regularizer*, such as a sparsity-inducing penalty, model simple constraints, such as $x$ belonging to a simple convex set, or be a combination of both. All $f$, $h$ and $c$ can be nonconvex. -Together with the companion library [ShiftedProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ShiftedProximalOperators.jl) described below, RegularizedOptimization.jl provides a modular and extensible framework for solving \eqref{eq:nlp}, and developing novel solvers. +RegularizedOptimization.jl provides a modular and extensible framework for solving \eqref{eq:nlp}, and developing novel solvers. Currently, the following solvers are implemented: - **Trust-region solvers TR and TRDH** [@aravkin-baraldi-orban-2022;@leconte-orban-2023] - **Quadratic regularization solvers R2, R2DH and R2N** [@diouane-habiboullah-orban-2024;@aravkin-baraldi-orban-2022] -- **Levenberg-Marquardt solvers LM and LMTR** [@aravkin-baraldi-orban-2024] used when $f$ is a least-squares residual +- **Levenberg-Marquardt solvers LM and LMTR** [@aravkin-baraldi-orban-2024] used when $f$ is a least-squares residual. - **Augmented Lagrangian solver AL** [@demarchi-jia-kanzow-mehlitz-2023]. All solvers rely on first derivatives of $f$ and $c$, and optionally on their second derivatives in the form of Hessian-vector products. -If second derivatives are not available or too costly to compute, quasi-Newton approximations can be used. +If second derivatives are not available, quasi-Newton approximations can be used. In addition, the proximal mapping of the nonsmooth part $h$, or adequate models thereof, must be evaluated. At each iteration, a step is computed by solving a subproblem of the form \eqref{eq:nlp} inexactly, in which $f$, $h$, and $c$ are replaced with appropriate models about the current iterate. The solvers R2, R2DH and TRDH are particularly well suited to solve the subproblems, though they are general enough to solve \eqref{eq:nlp}. -All solvers have a non-monotone mode that enhance performance in practice on certain problems [@leconte-orban-2023;@diouane-habiboullah-orban-2024]. -All are implemented in an in-place fashion, so that re-solves incur no allocations. -To illustrate our claim of extensibility, a first version of the AL solver was implemented and submitted by an external contributor. +All solvers are implemented in place, so re-solves incur no allocations. +To illustrate our claim of extensibility, a first version of the AL solver was implemented by an external contributor. +Furthermore, a nonsmooth penalty approach, described in [@diouane-gollier-orban-2024] is currently being developed, that relies on the library to efficiently solve the subproblems. @@ -68,43 +68,31 @@ To illustrate our claim of extensibility, a first version of the AL solver was i ## Model-based framework for nonsmooth methods In Julia, \eqref{eq:nlp} can be solved using [ProximalAlgorithms.jl](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl), which implements splitting schemes and line-search鈥揵ased methods [@stella-themelis-sopasakis-patrinos-2017;@themelis-stella-patrinos-2017]. -Among others, the **PANOC** [@stella-themelis-sopasakis-patrinos-2017] solver takes a step along a direction $d$, which depends on the gradient of $f$ modified by a L-BFGS Quasi-Newton approximation, followed by proximal steps on the nonsmooth part $h$. +Among others, the **PANOC** [@stella-themelis-sopasakis-patrinos-2017] solver takes a step along a direction $d$, which depends on the L-BFGS quasi-Newton approximation of $f$, followed by proximal steps on $h$. By contrast, [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) focuses on model-based trust-region and quadratic regularization methods, which typically require fewer evaluations of $f$ and its gradient than first-order line search methods, at the expense of more evaluations of proximal operators [@aravkin-baraldi-orban-2022]. -However, each proximal computation is inexpensive for numerous commonly used choices of $h$, such as separable penalties and bound constraints (see examples below), so that the overall approach is efficient for large-scale problems. +However, each proximal computation is inexpensive for numerous commonly used choices of $h$, such as separable penalties and bound constraints, so that the overall approach is efficient for large-scale problems. -When computing a step by (approximately) minimizing a model, [ShiftedProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ShiftedProximalOperators.jl) implements efficient allocation-free shifted proximal mappings. -Specifically, it supports shifted proximal operators of the form -$$ - \underset{t \in \mathbb{R}^n}{\arg\min} \, { \tfrac{1}{2} \|t - q\|_2^2 + \nu \psi(t + s; x) + \chi(s + t \mid \Delta \mathbb{B})} -$$ -where $q$ is given, $x$ and $s$ are fixed shifts, $\chi(\cdot \mid \Delta \mathbb{B})$ is the indicator of a ball of radius $\Delta > 0$ defined by a certain norm, and $\psi(\cdot; x)$ is a model of $h$ about $x$. -It is common to set $\psi(t + s; x) = h(x + s + t)$. - -These shifted operators allow to (i) incorporate bound or trust-region constraints via the indicator, which is required for the **TR** and **TRDH** solvers, and (ii) evaluate the above in place, without additional allocations, which is currently not possible with ProximalOperators.jl. - -RegularizedOptimization.jl provides a consistent API to formulate optimization problems and apply different solvers. -It integrates seamlessly with the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) [@jso] ecosystem, an academic organization for nonlinear optimization software development, testing, and benchmarking. +RegularizedOptimization.jl provides an API to formulate optimization problems and apply different solvers. +It integrates seamlessly with the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) [@jso] ecosystem. The smooth objective $f$ can be defined via [NLPModels.jl](https://github.com/JuliaSmoothOptimizers/NLPModels.jl) [@orban-siqueira-nlpmodels-2020], which provides a standardized Julia API for representing nonlinear programming (NLP) problems. -Large collections of such problems are available in [CUTEst.jl](https://github.com/JuliaSmoothOptimizers/CUTEst.jl) [@orban-siqueira-cutest-2020] and [OptimizationProblems.jl](https://github.com/JuliaSmoothOptimizers/OptimizationProblems.jl) [@migot-orban-siqueira-optimizationproblems-2023], but a use can easily interface or model their own smooth objective. - -The nonsmooth term $h$ can be modeled using [ProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ProximalOperators.jl), which provides a broad collection of regularizers and indicators of simple sets. +The nonsmooth term $h$ can be modeled using [ProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ProximalOperators.jl). -With $f$ and $h$ modeled as discussed above, the companion package [RegularizedProblems.jl](https://github.com/JuliaSmoothOptimizers/RegularizedProblems.jl) provides a straightforward way to pair them into a *Regularized Nonlinear Programming Model* +Given $f$ and $h$, the companion package [RegularizedProblems.jl](https://github.com/JuliaSmoothOptimizers/RegularizedProblems.jl) provides a way to pair them into a *Regularized Nonlinear Programming Model* ```julia reg_nlp = RegularizedNLPModel(f, h) ``` -They can also be paired into a *Regularized Nonlinear Least Squares Model* if $f(x) = \tfrac{1}{2} \|F(x)\|^2$ for some residual $F: \mathbb{R}^n \to \mathbb{R}^m$, as would be the case with the **LM** and **LMTR** solvers. +They can also be paired into a *Regularized Nonlinear Least-Squares Model* if $f(x) = \tfrac{1}{2} \|F(x)\|^2$ for some residual $F: \mathbb{R}^n \to \mathbb{R}^m$, in the case of the **LM** and **LMTR** solvers. ```julia -reg_nls = RegularizedNLSModel(f, h) +reg_nls = RegularizedNLSModel(F, h) ``` -RegularizedProblems.jl also provides a set of instances commonly used in data science and in the nonsmooth optimization literature, where several choices of $f$ can be paired with various nonsmooth terms $h$. -This design makes for a convenient source of reproducible problem instances for testing and benchmarking the solvers in [RegularizedOptimization.jl](https://www.github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl). +RegularizedProblems.jl also provides a set of instances commonly used in data science and in nonsmooth optimization, where several choices of $f$ can be paired with various regularizers. +This design makes for a convenient source of problem instances for benchmarking the solvers in [RegularizedOptimization.jl](https://www.github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl). ## Support for both exact and approximate Hessian @@ -113,27 +101,11 @@ Hessian鈥搗ector products $v \mapsto Hv$ can be obtained via automatic different Limited-memory and diagonal quasi-Newton approximations can be selected from [LinearOperators.jl](https://github.com/JuliaSmoothOptimizers/LinearOperators.jl). This design allows solvers to exploit second-order information without explicitly forming dense or sparse Hessians, which is often expensive in time and memory, particularly at large scale. -## Testing and documentation - -The package includes a comprehensive suite of unit tests that cover all functionalities, ensuring reliability and correctness. -Extensive documentation is provided, including a user guide, API reference, and examples to help users get started quickly. -Documentation is built using Documenter.jl. - -## Application +# Example -A novel implementation of the exact penalty approach [@diouane-gollier-orban-2024] for equality-constrained smooth optimization is being developed based on RegularizedOptimization.jl. -In it, $h(x) = \|c(x)\|$ and the model $\psi(\cdot; x)$ differs from $h$ itself. -Specifically, $\psi(\cdot; x)$ is the norm of a linearization of $c$ about $x$. -This is not covered in the current version of [ProximalAlgorithms.jl](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl). +We illustrate the capabilities of [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) on a Support Vector Machine (SVM) model with a $\ell_{1/2}^{1/2}$ penalty for image classification [@aravkin-baraldi-orban-2024]. -# Examples - -We illustrate the capabilities of [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) on two nonsmooth and nonconvex problems: - -- **Support Vector Machine (SVM) with $\ell_{1/2}^{1/2}$ penalty** for image classification [@aravkin-baraldi-orban-2024]. -- **Nonnegative Matrix Factorization (NNMF) with $\ell_0$ penalty and bound constraints** [@kim-park-2008]. - -Below is a condensed example showing how to define and solve SVM problem, and perform a solve followed by a re-solve: +Below is a condensed example showing how to define and solve the problem, and perform a solve followed by a re-solve: ```julia using LinearAlgebra, Random, ProximalOperators @@ -144,7 +116,7 @@ Random.seed!(1234) model, nls_model, _ = RegularizedProblems.svm_train_model() # Build SVM model f = LSR1Model(model) # L-SR1 Hessian approximation 位 = 1.0 # Regularization parameter -h = RootNormLhalf(位) # Nonsmooth term +h = RootNormLhalf(位) # Nonsmooth term reg_nlp = RegularizedNLPModel(f, h) # Regularized problem solver = R2NSolver(reg_nlp) # Choose solver stats = RegularizedExecutionStats(reg_nlp) @@ -152,59 +124,31 @@ solve!(solver, reg_nlp, stats; atol=1e-4, rtol=1e-4, verbose=1, sub_kwargs=(max_ solve!(solver, reg_nlp, stats; atol=1e-5, rtol=1e-5, verbose=1, sub_kwargs=(max_iter=200,)) ``` -The NNMF problem can be set up in a similar fashion: - -```julia -Random.seed!(1234) -m, n, k = 100, 50, 5 -model, nls_model, _, selected = nnmf_model(m, n, k) # Build NNMF model -x0 = rand(model.meta.nvar) # Initial point -位 = norm(grad(model, rand(model.meta.nvar)), Inf) / 200 # Regularization parameter -h = NormL0(位) # Nonsmooth term -reg_nls = RegularizedNLSModel(nls_model, h) # Regularized problem for LM -solver = LMSolver(reg_nls) # Choose solver -``` - ## Numerical results -We compare **TR**, **R2N**, **LM** and **LMTR** from our library. +We compare **TR**, **R2N**, **LM** and **LMTR** from our library on the SVM problem. -We report the following solver statistics in the table: the convergence status of each solver, the number of evaluations of $f$, the number of evaluations of $\nabla f$, the number of proximal operator evaluations, the elapsed time in seconds and the final objective value. -On the SVM and NNMF problems, we use limited-memory SR1 and BFGS Hessian approximations, respectively. +The table reports the convergence status of each solver, the number of evaluations of $f$, the number of evaluations of $\nabla f$, the number of proximal operator evaluations, the elapsed time and the final objective value. +For TR and R2N, we use limited-memory SR1 Hessian approximations. The subproblem solver is **R2**. \input{examples/Benchmark.tex} -- Note that for the **LM** and **LMTR** solvers, gradient evaluations count $\#\nabla f$ equals the number of Jacobian鈥搗ector and adjoint-Jacobian鈥搗ector products. +For the **LM** and **LMTR** solvers, $\#\nabla f$ counts the number of Jacobian鈥搗ector and adjoint-Jacobian鈥搗ector products. All methods successfully reduced the optimality measure below the specified tolerance of $10^{-4}$, and thus converged to an approximate first-order stationary point. -Note that, the final objective values differ due to the nonconvexity of the problems. +Note that the final objective values differ due to the nonconvexity of the problem. -- **SVM with $\ell^{1/2}$ penalty:** **R2N** is the fastest, requiring the fewest gradient evaluations compared to all the other solvers. +**R2N** is the fastest in terms of time and number of gradient evaluations. However, it requires more proximal evaluations, but these are inexpensive. -**LMTR** and **LM** require the fewest function evaluations, but incur many Jacobian鈥搗ector products, and are the slowest. -Note that here, **LMTR** achieves the lowest objective value. -- **NNMF with constrained $\ell_0$ penalty:** **LMTR** is the fastest, and requires a fewer number of function evaluations than all the other solvers. Followed by **TR** which is the second fastest and requires the fewest gradient evaluations, however it achieves the highest objective value. -Note that both **LMTR** and **LM** achieve the lowest objective value. - -Additional tests (e.g., other regularizers, constraint types, and scaling dimensions) have also been conducted, and a full benchmarking campaign is currently underway. - -# Conclusion - -The experiments highlight the effectiveness of the solvers implemented in [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl). - - - - - - +**LMTR** and **LM** require the fewest function evaluations, but incur many Jacobian鈥搗ector products, and are the slowest in terms of time. -In ongoing research, the package will be extended with algorithms that enable to reduce the number of proximal evaluations, especially when the proximal mapping of $h$ is expensive to compute. +Ongoing research aims to reduce the number of proximal evaluations. # Acknowledgements -The authors would like to thank Alberto Demarchi for his implementation of the Augmented Lagrangian solver. -Mohamed Laghdaf Habiboullah is supported by an excellence FRQNT grant. -Youssef Diouane, Maxence Gollier and Dominique Orban are partially supported by an NSERC Discovery Grant. +The authors would like to thank A. De Marchi for the Augmented Lagrangian solver. +M. L. Habiboullah is supported by an excellence FRQNT grant. +Y. Diouane, M. Gollier and D. Orban are partially supported by an NSERC Discovery Grant. # References