JuliaAI
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/ci.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Project.toml‎
Lines changed: 2 additions & 2 deletions b/‎Project.toml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/fit/admm.jl‎
Lines changed: 2 additions & 2 deletions b/‎src/fit/admm.jl‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/fit/analytical.jl‎
Lines changed: 2 additions & 2 deletions b/‎src/fit/analytical.jl‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/fit/iwls.jl‎
Lines changed: 0 additions & 1 deletion b/‎src/fit/iwls.jl‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/fit/proxgrad.jl‎
Lines changed: 2 additions & 2 deletions b/‎src/fit/proxgrad.jl‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/glr/constructors.jl‎
Lines changed: 71 additions & 25 deletions b/‎src/glr/constructors.jl‎
Lines changed: 71 additions & 25 deletions
diff --git a/‎src/glr/d_l2loss.jl‎
Lines changed: 2 additions & 2 deletions b/‎src/glr/d_l2loss.jl‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/glr/d_logistic.jl‎
Lines changed: 6 additions & 6 deletions b/‎src/glr/d_logistic.jl‎
Lines changed: 6 additions & 6 deletions
@@ -17,7 +17,7 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.0'
+          - '1.5'
           - '1'
         os:
           - ubuntu-latest
 
@@ -1,7 +1,7 @@
 name = "MLJLinearModels"
 uuid = "6ee0df7b-362f-4a72-a706-9e79364fb692"
 authors = ["Thibaut Lienart <[email protected]>"]
-version = "0.5.6"
+version = "0.6.1"
 
 [deps]
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
@@ -19,7 +19,7 @@ LinearMaps = "2.6, 3.2"
 MLJModelInterface = "0.3, 0.4, 1.0"
 Optim = "0.20, 0.21, 1"
 Parameters = "0.12"
-julia = "^1"
+julia = "1.5"
 
 [extras]
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 
@@ -9,7 +9,7 @@
 
 # function _fit(glr::GLR{L1Loss,<:L2R}, solver::ADMM, X, y)
 #     n, p = size(X)
-#     λ    = getscale(glr.penalty)
+#     λ    = get_penalty_scale(glr, n)
 #     φ    = 1.0 / solver.rho
 #     λφ   = λ * φ
 #     # pre-computations
@@ -63,7 +63,7 @@
 #
 # function _fit(glr::GLR{L1Loss,<:L2R}, solver::FADMM, X, y)
 #     n, p = size(X)
-#     λ    = getscale(glr.penalty)
+#     λ    = get_penalty_scale(glr, n)
 #     ρ    = solver.rho
 #     η    = solver.eta   # linked to restart frequency
 #     τ    = solver.tau   # linked to updating ρ
 
@@ -18,13 +18,13 @@ Assuming `n` dominates `p`,
 function _fit(glr::GLR{L2Loss,<:L2R}, solver::Analytical, X, y, scratch)
     # full solve
     if !solver.iterative
-        λ  = getscale(glr.penalty)
+        λ  = get_penalty_scale(glr, length(y))
         if iszero(λ)
             # standard LS solution
             return augment_X(X, glr.fit_intercept) \ y
         else
             # Ridge case -- form the Hat Matrix then solve
-            H = form_XtX(X, glr.fit_intercept, λ)
+            H = form_XtX(X, glr.fit_intercept, λ, glr.penalize_intercept)
             b = X'y
             glr.fit_intercept && (b = vcat(b, sum(y)))
             return cholesky!(H) \ b
 
@@ -1,6 +1,5 @@
 function _fit(glr::GLR{RobustLoss{ρ},<:L2R}, solver::IWLSCG, X, y, scratch
               ) where {ρ}
-    λ    = getscale(glr.penalty)
     n,p,_ = npc(scratch)
     _Mv! = Mv!(glr, X, y, scratch; threshold=solver.threshold)
     κ    = solver.damping # between 0 and 1, 1 = fully take the new iteration
 
@@ -21,7 +21,7 @@ function _fit(glr::GLR, solver::ProxGrad, X, y, scratch)
     # functions
     _f      = smooth_objective(glr, X, y; c=c)
     _fg!    = smooth_fg!(glr, X, y, scratch)
-    _prox!  = prox!(glr)
+    _prox!  = prox!(glr, size(X, 1))
     bt_cond = θ̂ ->
                 _f(θ̂) > fθ̄ + dot(θ̂ .- θ̄, ∇fθ̄) + sum(abs2.(θ̂ .- θ̄)) / (2η)
     # loop-related
@@ -48,7 +48,7 @@ function _fit(glr::GLR, solver::ProxGrad, X, y, scratch)
         end
         if inner == solver.max_inner
             @warn "No appropriate stepsize found via backtracking; " *
-                  "interrupting."
+                  "interrupting. The reason could be input data that is not standardized."
             break
         end
         # update caches
 
@@ -14,6 +14,8 @@ Generalized Linear Regression (GLR) model with objective function:
 
 where `L` is a loss function, `P` a penalty, `y` is the vector of observed
 response, `X` is the feature matrix and `θ` the vector of parameters.
+If `scale_penalty_with_samples = true` (default) the penalty is automatically
+scaled with the number of samples.
 
 Special cases include:
 
@@ -28,6 +30,7 @@ Special cases include:
     penalty::P               = NoPenalty() # P(θ)
     fit_intercept::Bool      = true        # add intercept ? def=true
     penalize_intercept::Bool = false
+    scale_penalty_with_samples::Bool = true
 end
 
 const GLR = GeneralizedLinearRegression
@@ -48,44 +51,59 @@ LinearRegression(; fit_intercept::Bool=true) = GLR(fit_intercept=fit_intercept)
 """
 $SIGNATURES
 
-Objective function: ``|Xθ - y|₂²/2 + λ|θ|₂²/2``.
+Objective function: ``|Xθ - y|₂²/2 + n⋅λ|θ|₂²/2``,
+where ``n`` is the number of samples `size(X, 1)`.
+With `scale_penalty_with_samples = false` the objective function is
+``|Xθ - y|₂²/2 + λ|θ|₂²/2``.
 """
 function RidgeRegression(λ::Real=1.0; lambda::Real=λ, fit_intercept::Bool=true,
-                         penalize_intercept::Bool=false)
+                         penalize_intercept::Bool=false,
+                         scale_penalty_with_samples::Bool=true)
     check_pos(lambda)
     GLR(penalty=lambda*L2Penalty(),
         fit_intercept=fit_intercept,
-        penalize_intercept=penalize_intercept)
+        penalize_intercept=penalize_intercept,
+        scale_penalty_with_samples=scale_penalty_with_samples)
 end
 
 
 """
 $SIGNATURES
 
-Objective function: ``|Xθ - y|₂²/2 + λ|θ|₁``.
+Objective function: ``|Xθ - y|₂²/2 + n⋅λ|θ|₁``,
+where ``n`` is the number of samples `size(X, 1)`.
+With `scale_penalty_with_samples = false` the objective function is
+``|Xθ - y|₂²/2 + λ|θ|₁``
 """
 function LassoRegression(λ::Real=1.0; lambda::Real=λ, fit_intercept::Bool=true,
-                         penalize_intercept::Bool=false)
+                         penalize_intercept::Bool=false,
+                         scale_penalty_with_samples::Bool=true)
     check_pos(lambda)
     GLR(penalty=lambda*L1Penalty(),
         fit_intercept=fit_intercept,
-        penalize_intercept=penalize_intercept)
+        penalize_intercept=penalize_intercept,
+        scale_penalty_with_samples=scale_penalty_with_samples)
 end
 
 
 """
 $SIGNATURES
 
-Objective function: ``|Xθ - y|₂²/2 + λ|θ|₂²/2 + γ|θ|₁``.
+Objective function: ``|Xθ - y|₂²/2 + n⋅λ|θ|₂²/2 + n⋅γ|θ|₁``,
+where ``n`` is the number of samples `size(X, 1)`.
+With `scale_penalty_with_samples = false` the objective function is
+``|Xθ - y|₂²/2 + λ|θ|₂²/2 + γ|θ|₁``
 """
 function ElasticNetRegression(λ::Real=1.0, γ::Real=1.0;
                               lambda::Real=λ, gamma::Real=γ,
                               fit_intercept::Bool=true,
-                              penalize_intercept::Bool=false)
+                              penalize_intercept::Bool=false,
+                              scale_penalty_with_samples::Bool=true)
     check_pos.((lambda, gamma))
     GLR(penalty=lambda*L2Penalty()+gamma*L1Penalty(),
         fit_intercept=fit_intercept,
-        penalize_intercept=penalize_intercept)
+        penalize_intercept=penalize_intercept,
+        scale_penalty_with_samples=scale_penalty_with_samples)
 end
 
 
@@ -114,14 +132,18 @@ end
 """
 $SIGNATURES
 
-Objective function: ``L(y, Xθ) + λ|θ|₂²/2 + γ|θ|₁`` where `L` is either the
-logistic loss in the binary case or the multinomial loss otherwise.
+Objective function: ``L(y, Xθ) + n⋅λ|θ|₂²/2 + n⋅γ|θ|₁`` where `L` is either the
+logistic loss in the binary case or the multinomial loss otherwise and
+``n`` is the number of samples `size(X, 1)`.
+With `scale_penalty_with_samples = false` the objective function is
+``L(y, Xθ) + λ|θ|₂²/2 + γ|θ|₁``.
 """
 function LogisticRegression(λ::Real=1.0, γ::Real=0.0;
                             lambda::Real=λ, gamma::Real=γ,
                             penalty::Symbol=iszero(gamma) ? :l2 : :en,
                             fit_intercept::Bool=true,
                             penalize_intercept::Bool=false,
+                            scale_penalty_with_samples::Bool=true,
                             multi_class::Bool=false,
                             nclasses::Integer=0)
     penalty = _l1l2en(lambda, gamma, penalty, "Logistic regression")
@@ -134,14 +156,18 @@ function LogisticRegression(λ::Real=1.0, γ::Real=0.0;
     GLR(loss=loss,
         penalty=penalty,
         fit_intercept=fit_intercept,
-        penalize_intercept=penalize_intercept)
+        penalize_intercept=penalize_intercept,
+        scale_penalty_with_samples=scale_penalty_with_samples)
 end
 
 """
 $SIGNATURES
 
-Objective function: ``L(y, Xθ) + λ|θ|₂²/2 + γ|θ|₁`` where `L` is the
-multinomial loss.
+Objective function: ``L(y, Xθ) + n⋅λ|θ|₂²/2 + n⋅γ|θ|₁`` where `L` is the
+multinomial loss and
+``n`` is the number of samples `size(X, 1)`.
+With `scale_penalty_with_samples = false` the objective function is
+``L(y, Xθ) + λ|θ|₂²/2 + γ|θ|₁``.
 """
 MultinomialRegression(a...; kwa...) =
     LogisticRegression(a...; multi_class=true, kwa...)
@@ -152,74 +178,94 @@ MultinomialRegression(a...; kwa...) =
 """
 $SIGNATURES
 
-Objective function: ``∑ρ(Xθ - y) + λ|θ|₂² + γ|θ|₁`` where ρ is a given function
-on the residuals.
+Objective function: ``∑ρ(Xθ - y) + n⋅λ|θ|₂² + n⋅γ|θ|₁`` where ρ is a given function
+on the residuals and
+``n`` is the number of samples `size(X, 1)`.
+With `scale_penalty_with_samples = false` the objective function is
+``∑ρ(Xθ - y) + λ|θ|₂² + γ|θ|₁``.
 """
 function RobustRegression(ρ::RobustRho=HuberRho(0.1), λ::Real=1.0, γ::Real=0.0;
                           rho::RobustRho=ρ, lambda::Real=λ, gamma::Real=γ,
                           penalty::Symbol=iszero(gamma) ? :l2 : :en,
                           fit_intercept::Bool=true,
+                          scale_penalty_with_samples::Bool=true,
                           penalize_intercept::Bool=false)
     penalty = _l1l2en(lambda, gamma, penalty, "Robust regression")
     GLR(loss=RobustLoss(rho),
         penalty=penalty,
         fit_intercept=fit_intercept,
-        penalize_intercept=penalize_intercept)
+        penalize_intercept=penalize_intercept,
+        scale_penalty_with_samples=scale_penalty_with_samples)
 end
 
 """
 $SIGNATURES
 
 Huber Regression with objective:
 
-``∑ρ(Xθ - y) + λ|θ|₂²/2 + γ|θ|₁``
+``∑ρ(Xθ - y) + n⋅λ|θ|₂²/2 + n⋅γ|θ|₁``
 
 Where `ρ` is the Huber function `ρ(r) = r²/2``  if `|r|≤δ` and
-`ρ(r)=δ(|r|-δ/2)` otherwise.
+`ρ(r)=δ(|r|-δ/2)` otherwise and
+``n`` is the number of samples `size(X, 1)`.
+With `scale_penalty_with_samples = false` the objective function is
+``∑ρ(Xθ - y) + λ|θ|₂²/2 + γ|θ|₁``.
 """
 function HuberRegression(δ::Real=0.5, λ::Real=1.0, γ::Real=0.0;
                          delta::Real=δ, lambda::Real=λ, gamma::Real=γ,
                          penalty::Symbol=iszero(gamma) ? :l2 : :en,
                          fit_intercept::Bool=true,
+                         scale_penalty_with_samples::Bool=true,
                          penalize_intercept::Bool=false)
     return RobustRegression(HuberRho(delta), lambda, gamma;
                             penalty=penalty, fit_intercept=fit_intercept,
-                            penalize_intercept=penalize_intercept)
+                            penalize_intercept=penalize_intercept,
+                            scale_penalty_with_samples=scale_penalty_with_samples)
 end
 
 """
 $SIGNATURES
 
 Quantile Regression with objective:
 
-``∑ρ(Xθ - y) + λ|θ|₂²/2 + γ|θ|₁``
+``∑ρ(Xθ - y) + n⋅λ|θ|₂²/2 + n⋅γ|θ|₁``
 
-Where `ρ` is the check function `ρ(r) = r(δ - 1(r < 0))`.
+Where `ρ` is the check function `ρ(r) = r(δ - 1(r < 0))` and
+``n`` is the number of samples `size(X, 1)`.
+With `scale_penalty_with_samples = false` the objective function is
+``∑ρ(Xθ - y) + λ|θ|₂²/2 + γ|θ|₁``.
 """
 function QuantileRegression(δ::Real=0.5, λ::Real=1.0, γ::Real=0.0;
                             delta::Real=δ, lambda::Real=λ, gamma::Real=γ,
                             penalty::Symbol=iszero(gamma) ? :l2 : :en,
                             fit_intercept::Bool=true,
+                            scale_penalty_with_samples::Bool=true,
                             penalize_intercept::Bool=false)
     return RobustRegression(QuantileRho(delta), lambda, gamma;
                             penalty=penalty, fit_intercept=fit_intercept,
-                            penalize_intercept=penalize_intercept)
+                            penalize_intercept=penalize_intercept,
+                            scale_penalty_with_samples=scale_penalty_with_samples)
 end
 
 """
 $SIGNATURES
 
 Least Absolute Deviation regression with objective:
 
-``|Xθ - y|₁ + λ|θ|₂²/2 + γ|θ|₁``
+``|Xθ - y|₁ + n⋅λ|θ|₂²/2 + n⋅γ|θ|₁``
+where ``n`` is the number of samples `size(X, 1)`.
+With `scale_penalty_with_samples = false` the objective function is
+``|Xθ - y|₁ + λ|θ|₂²/2 + γ|θ|₁``.
 
 This is a specific type of Quantile Regression with `δ=0.5` (median).
 """
 function LADRegression(λ::Real=1.0, γ::Real=0.0;
                        lambda::Real=λ, gamma::Real=γ,
                        penalty::Symbol=iszero(gamma) ? :l2 : :en,
+                       scale_penalty_with_samples::Bool=true,
                        fit_intercept::Bool=true, penalize_intercept::Bool=false)
     return QuantileRegression(0.5, lambda, gamma;
                               penalty=penalty, fit_intercept=fit_intercept,
-                              penalize_intercept=penalize_intercept)
+                              penalize_intercept=penalize_intercept,
+                              scale_penalty_with_samples=scale_penalty_with_samples)
 end
@@ -14,7 +14,7 @@
 
 function Hv!(glr::GLR{L2Loss,<:L2R}, X, y, scratch)
     n, p = size(X)
-    λ    = getscale(glr.penalty)
+    λ    = get_penalty_scale(glr, n)
     if glr.fit_intercept
         # H = [X 1]'[X 1] + λ I
         # rows a 1:p = [X'X + λI | X'1]
@@ -61,7 +61,7 @@ end
 # ---------------------------------------------------------
 
 function smooth_fg!(glr::GLR{L2Loss,<:ENR}, X, y, scratch)
-    λ = getscale_l2(glr.penalty)
+    λ = get_penalty_scale_l2(glr, length(y))
     (g, θ) -> begin
         # cache contains the residuals (Xθ-y)
         r = scratch.n
 
@@ -12,9 +12,9 @@
 # ---------------------------------------------------------
 
 function fgh!(glr::GLR{LogisticLoss,<:L2R}, X, y, scratch)
-    J    = objective(glr) # GLR objective (loss+penalty)
     n, p = size(X)
-    λ    = getscale(glr.penalty)
+    J    = objective(glr, n) # GLR objective (loss+penalty)
+    λ    = get_penalty_scale(glr, n)
     if glr.fit_intercept
         (f, g, H, θ) -> begin
             Xθ = scratch.n
@@ -78,7 +78,7 @@ end
 
 function Hv!(glr::GLR{LogisticLoss,<:L2R}, X, y, scratch)
     n, p = size(X)
-    λ    = getscale(glr.penalty)
+    λ    = get_penalty_scale(glr, n)
     if glr.fit_intercept
         # H = [X 1]'Λ[X 1] + λ I
         # rows a 1:p = [X'ΛX + λI | X'Λ1]
@@ -155,7 +155,7 @@ end
 function fg!(glr::GLR{<:MultinomialLoss,<:L2R}, X, y, scratch)
     n, p = size(X)
     c    = getc(glr, y)
-    λ    = getscale(glr.penalty)
+    λ    = get_penalty_scale(glr, n)
     (f, g, θ) -> begin
         P  = scratch.nc
         apply_X!(P, X, θ, c, scratch)                # O(npc) store n * c
@@ -208,8 +208,8 @@ function fg!(glr::GLR{<:MultinomialLoss,<:L2R}, X, y, scratch)
 end
 
 function Hv!(glr::GLR{<:MultinomialLoss,<:L2R}, X, y, scratch)
-    p = size(X, 2)
-    λ = getscale(glr.penalty)
+    n, p = size(X)
+    λ = get_penalty_scale(glr, n)
     c = getc(glr, y)
     # NOTE:
     # * ideally P and Q should be recuperated from gradient computations (fghv!)