Merge branch 'dev'

tlienart · tlienart · commit f24431490d50 · 2023-01-25T11:49:27.000+01:00
diff --git a/README.md b/README.md
@@ -6,11 +6,17 @@
 
 This is a package gathering functionalities to solve a number of generalised linear regression/classification problems which, inherently, correspond to an optimisation problem of the form
 
-```
-L(y, Xθ) + P(θ)
-```
+$$
+L(y, X\theta) + P(\theta)
+$$
+
+where:
+
+- $L$ is a loss function 
+- $X$ is the $n \times p$ matrix of training observations, where $n$ is the number of _observations_ (sample size) and $p$ is the number of _features_ (dimension)
+- $\theta$ the length $p$ vector of weights to be optimized
+- $P$  is a penalty function
 
-where `L` is a loss function and `P`  is a penalty function (both of those can be scaled or composed).
 Additional regression/classification methods which do not directly correspond to this formulation may be added in the future.
 
 The core aims of this package are:
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -11,7 +11,7 @@ where:
 * ``y`` is the **target** or **response**, a vector of length ``n`` either of real values (_regression_) or integers (_classification_),
 * ``X`` is the **design** or **feature** matrix, a matrix of real values of size ``n \times p`` where ``p`` is the number of _features_ or _dimensions_,\
 * ``\theta`` is a vector of ``p`` real valued coefficients to determine,
-* ``L`` is a **loss function**, a pre-determined function of ``\mathbb R^n`` to ``\mathbb R^+`` penalising the amplitude of the _residuals_ in a specific way,
+* ``L`` is a **loss function**, a pre-determined function of ``\mathbb R^n \times \mathbb R^n`` to ``\mathbb R^+`` penalising the amplitude of the _residuals_ in a specific way,
 * ``P`` is a **penalty function**, a pre-determined function of ``\mathbb R^n`` to ``\mathbb R^+`` penalising the amplitude of the  _coefficients_ in a specific way.
 
 A well known example is the [Ridge regression](https://en.wikipedia.org/wiki/Tikhonov_regularization) where the objective is to minimise:
diff --git a/src/fit/newton.jl b/src/fit/newton.jl
@@ -18,7 +18,8 @@ function _fit(glr::GLR{<:Union{LogisticLoss,RobustLoss},<:L2R},
     θ₀    = zeros(p)
     _fgh! = fgh!(glr, X, y, scratch)
     opt   = Optim.only_fgh!(_fgh!)
-    res   = Optim.optimize(opt, θ₀, Optim.Newton())
+    res   = Optim.optimize(opt, θ₀, Optim.Newton(; solver.newton_options...),
+                           solver.optim_options)
     return Optim.minimizer(res)
 end
 
@@ -42,7 +43,8 @@ function _fit(glr::GLR{<:Union{LogisticLoss,RobustLoss},<:L2R},
     _fg!  = (g, θ) -> fgh!(glr, X, y, scratch)(0.0, g, nothing, θ) # Optim#738
     _Hv!  = Hv!(glr, X, y, scratch)
     opt   = Optim.TwiceDifferentiableHV(_f, _fg!, _Hv!, θ₀)
-    res   = Optim.optimize(opt, θ₀, Optim.KrylovTrustRegion())
+    res   = Optim.optimize(opt, θ₀, Optim.KrylovTrustRegion(; solver.newtoncg_options...),
+                           solver.optim_options)
     return Optim.minimizer(res)
 end
 
@@ -62,7 +64,8 @@ function _fit(glr::GLR{<:Union{LogisticLoss,RobustLoss},<:L2R},
     θ₀    = zeros(p)
     _fg!  = (f, g, θ) -> fgh!(glr, X, y, scratch)(f, g, nothing, θ)
     opt   = Optim.only_fg!(_fg!)
-    res   = Optim.optimize(opt, θ₀, Optim.LBFGS())
+    res   = Optim.optimize(opt, θ₀, Optim.LBFGS(; solver.lbfgs_options...),
+                           solver.optim_options)
     return Optim.minimizer(res)
 end
 
@@ -90,7 +93,8 @@ function _fit(glr::GLR{<:MultinomialLoss,<:L2R}, solver::NewtonCG,
     _fg!  = (g, θ) -> fg!(glr, X, y, scratch)(0.0, g, θ) # XXX: Optim.jl/738
     _Hv!  = Hv!(glr, X, y, scratch)
     opt   = Optim.TwiceDifferentiableHV(_f, _fg!, _Hv!, θ₀)
-    res   = Optim.optimize(opt, θ₀, Optim.KrylovTrustRegion())
+    res   = Optim.optimize(opt, θ₀, Optim.KrylovTrustRegion(; solver.newtoncg_options...),
+                           solver.optim_options)
     return Optim.minimizer(res)
 end
 
@@ -111,6 +115,7 @@ function _fit(glr::GLR{<:MultinomialLoss,<:L2R}, solver::LBFGS,
     θ₀    = zeros(p * c)
     _fg!  = fg!(glr, X, y, scratch)
     opt   = Optim.only_fg!(_fg!)
-    res   = Optim.optimize(opt, θ₀, Optim.LBFGS())
+    res   = Optim.optimize(opt, θ₀, Optim.LBFGS(; solver.lbfgs_options...),
+                           solver.optim_options)
     return Optim.minimizer(res)
 end
diff --git a/src/fit/solvers.jl b/src/fit/solvers.jl
@@ -41,8 +41,22 @@ $SIGNATURES
 
 Newton solver. This is a full Hessian solver and should be avoided for
 "large scale" cases.
+
+`optim_options` are the [general Optim Options](https://julianlsolvers.github.io/Optim.jl/stable/#user/config/).
+`newton_options` are the [options of Newton's method](https://julianlsolvers.github.io/Optim.jl/stable/#algo/newton/)
+
+## Example
+```julia
+using MLJLinearModels, Optim
+
+solver = MLJLinearModels.Newton(optim_options = Optim.Options(time_limit = 20),
+                                newton_options = (linesearch = Optim.LineSearches.HagerZhang()),))
+```
 """
-struct Newton <: Solver end
+@with_kw struct Newton{O,S} <: Solver
+    optim_options::O = Optim.Options()
+    newton_options::S = (; )
+end
 
 """
 $SIGNATURES
@@ -51,17 +65,44 @@ Newton CG solver. This is the same as the Newton solver except that instead
 of solving systems of the form `H\\b` where `H` is the full Hessian, it uses
 a matrix-free conjugate gradient approach to solving that system. This should
 generally be preferred for larger scale cases.
+
+`optim_options` are the [general Optim Options](https://julianlsolvers.github.io/Optim.jl/stable/#user/config/).
+`newtoncg_options` are the [options of Krylov Trust Region method](https://github.com/JuliaNLSolvers/Optim.jl/blob/master/src/multivariate/solvers/second_order/krylov_trust_region.jl)
+
+## Example
+```julia
+using MLJLinearModels, Optim
+
+solver = MLJLinearModels.Newton(optim_options = Optim.Options(time_limit = 20),
+                                newtoncg_options = (eta = 0.2,))
+```
+
 """
-struct NewtonCG <: Solver end
+@with_kw struct NewtonCG{O,S} <: Solver
+    optim_options::O = Optim.Options()
+    newtoncg_options::S = (; )
+end
 
 """
 $SIGNATURES
 
 LBFGS quasi-Newton solver. See [the wikipedia entry](https://en.wikipedia.org/wiki/Limited-memory_BFGS).
-"""
-struct LBFGS <: Solver end
 
-# struct BFGS <: Solver end
+`optim_options` are the [general Optim Options](https://julianlsolvers.github.io/Optim.jl/stable/#user/config/).
+`lbfgs_options` are the [options of LBFGS method](https://julianlsolvers.github.io/Optim.jl/stable/#algo/lbfgs/)
+
+## Example
+```julia
+using MLJLinearModels, Optim
+
+solver = MLJLinearModels.Newton(optim_options = Optim.Options(time_limit = 20),
+                                lbfgs_options = (linesearch = Optim.LineSearches.HagerZhang()),))
+```
+"""
+@with_kw struct LBFGS{O,S} <: Solver
+    optim_options::O = Optim.Options()
+    lbfgs_options::S = (; )
+end
 
 # ===================== pgrad.jl