Skip to content

Commit f4a38dd

Browse files
authored
Merge pull request #113 from JuliaAI/dev
For a 0.6.1 release
2 parents abf145f + 0e19298 commit f4a38dd

24 files changed

+417
-198
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
fail-fast: false
1818
matrix:
1919
version:
20-
- '1.0'
20+
- '1.5'
2121
- '1'
2222
os:
2323
- ubuntu-latest

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "MLJLinearModels"
22
uuid = "6ee0df7b-362f-4a72-a706-9e79364fb692"
33
authors = ["Thibaut Lienart <[email protected]>"]
4-
version = "0.5.6"
4+
version = "0.6.1"
55

66
[deps]
77
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
@@ -19,7 +19,7 @@ LinearMaps = "2.6, 3.2"
1919
MLJModelInterface = "0.3, 0.4, 1.0"
2020
Optim = "0.20, 0.21, 1"
2121
Parameters = "0.12"
22-
julia = "^1"
22+
julia = "1.5"
2323

2424
[extras]
2525
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"

src/fit/admm.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
# function _fit(glr::GLR{L1Loss,<:L2R}, solver::ADMM, X, y)
1111
# n, p = size(X)
12-
# λ = getscale(glr.penalty)
12+
# λ = get_penalty_scale(glr, n)
1313
# φ = 1.0 / solver.rho
1414
# λφ = λ * φ
1515
# # pre-computations
@@ -63,7 +63,7 @@
6363
#
6464
# function _fit(glr::GLR{L1Loss,<:L2R}, solver::FADMM, X, y)
6565
# n, p = size(X)
66-
# λ = getscale(glr.penalty)
66+
# λ = get_penalty_scale(glr, n)
6767
# ρ = solver.rho
6868
# η = solver.eta # linked to restart frequency
6969
# τ = solver.tau # linked to updating ρ

src/fit/analytical.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@ Assuming `n` dominates `p`,
1818
function _fit(glr::GLR{L2Loss,<:L2R}, solver::Analytical, X, y, scratch)
1919
# full solve
2020
if !solver.iterative
21-
λ = getscale(glr.penalty)
21+
λ = get_penalty_scale(glr, length(y))
2222
if iszero(λ)
2323
# standard LS solution
2424
return augment_X(X, glr.fit_intercept) \ y
2525
else
2626
# Ridge case -- form the Hat Matrix then solve
27-
H = form_XtX(X, glr.fit_intercept, λ)
27+
H = form_XtX(X, glr.fit_intercept, λ, glr.penalize_intercept)
2828
b = X'y
2929
glr.fit_intercept && (b = vcat(b, sum(y)))
3030
return cholesky!(H) \ b

src/fit/iwls.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
function _fit(glr::GLR{RobustLoss{ρ},<:L2R}, solver::IWLSCG, X, y, scratch
22
) where {ρ}
3-
λ = getscale(glr.penalty)
43
n,p,_ = npc(scratch)
54
_Mv! = Mv!(glr, X, y, scratch; threshold=solver.threshold)
65
κ = solver.damping # between 0 and 1, 1 = fully take the new iteration

src/fit/proxgrad.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ function _fit(glr::GLR, solver::ProxGrad, X, y, scratch)
2121
# functions
2222
_f = smooth_objective(glr, X, y; c=c)
2323
_fg! = smooth_fg!(glr, X, y, scratch)
24-
_prox! = prox!(glr)
24+
_prox! = prox!(glr, size(X, 1))
2525
bt_cond = θ̂ ->
2626
_f(θ̂) > fθ̄ + dot(θ̂ .- θ̄, ∇fθ̄) + sum(abs2.(θ̂ .- θ̄)) / (2η)
2727
# loop-related
@@ -48,7 +48,7 @@ function _fit(glr::GLR, solver::ProxGrad, X, y, scratch)
4848
end
4949
if inner == solver.max_inner
5050
@warn "No appropriate stepsize found via backtracking; " *
51-
"interrupting."
51+
"interrupting. The reason could be input data that is not standardized."
5252
break
5353
end
5454
# update caches

src/glr/constructors.jl

Lines changed: 71 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ Generalized Linear Regression (GLR) model with objective function:
1414
1515
where `L` is a loss function, `P` a penalty, `y` is the vector of observed
1616
response, `X` is the feature matrix and `θ` the vector of parameters.
17+
If `scale_penalty_with_samples = true` (default) the penalty is automatically
18+
scaled with the number of samples.
1719
1820
Special cases include:
1921
@@ -28,6 +30,7 @@ Special cases include:
2830
penalty::P = NoPenalty() # P(θ)
2931
fit_intercept::Bool = true # add intercept ? def=true
3032
penalize_intercept::Bool = false
33+
scale_penalty_with_samples::Bool = true
3134
end
3235

3336
const GLR = GeneralizedLinearRegression
@@ -48,44 +51,59 @@ LinearRegression(; fit_intercept::Bool=true) = GLR(fit_intercept=fit_intercept)
4851
"""
4952
$SIGNATURES
5053
51-
Objective function: ``|Xθ - y|₂²/2 + λ|θ|₂²/2``.
54+
Objective function: ``|Xθ - y|₂²/2 + n⋅λ|θ|₂²/2``,
55+
where ``n`` is the number of samples `size(X, 1)`.
56+
With `scale_penalty_with_samples = false` the objective function is
57+
``|Xθ - y|₂²/2 + λ|θ|₂²/2``.
5258
"""
5359
function RidgeRegression::Real=1.0; lambda::Real=λ, fit_intercept::Bool=true,
54-
penalize_intercept::Bool=false)
60+
penalize_intercept::Bool=false,
61+
scale_penalty_with_samples::Bool=true)
5562
check_pos(lambda)
5663
GLR(penalty=lambda*L2Penalty(),
5764
fit_intercept=fit_intercept,
58-
penalize_intercept=penalize_intercept)
65+
penalize_intercept=penalize_intercept,
66+
scale_penalty_with_samples=scale_penalty_with_samples)
5967
end
6068

6169

6270
"""
6371
$SIGNATURES
6472
65-
Objective function: ``|Xθ - y|₂²/2 + λ|θ|₁``.
73+
Objective function: ``|Xθ - y|₂²/2 + n⋅λ|θ|₁``,
74+
where ``n`` is the number of samples `size(X, 1)`.
75+
With `scale_penalty_with_samples = false` the objective function is
76+
``|Xθ - y|₂²/2 + λ|θ|₁``
6677
"""
6778
function LassoRegression::Real=1.0; lambda::Real=λ, fit_intercept::Bool=true,
68-
penalize_intercept::Bool=false)
79+
penalize_intercept::Bool=false,
80+
scale_penalty_with_samples::Bool=true)
6981
check_pos(lambda)
7082
GLR(penalty=lambda*L1Penalty(),
7183
fit_intercept=fit_intercept,
72-
penalize_intercept=penalize_intercept)
84+
penalize_intercept=penalize_intercept,
85+
scale_penalty_with_samples=scale_penalty_with_samples)
7386
end
7487

7588

7689
"""
7790
$SIGNATURES
7891
79-
Objective function: ``|Xθ - y|₂²/2 + λ|θ|₂²/2 + γ|θ|₁``.
92+
Objective function: ``|Xθ - y|₂²/2 + n⋅λ|θ|₂²/2 + n⋅γ|θ|₁``,
93+
where ``n`` is the number of samples `size(X, 1)`.
94+
With `scale_penalty_with_samples = false` the objective function is
95+
``|Xθ - y|₂²/2 + λ|θ|₂²/2 + γ|θ|₁``
8096
"""
8197
function ElasticNetRegression::Real=1.0, γ::Real=1.0;
8298
lambda::Real=λ, gamma::Real=γ,
8399
fit_intercept::Bool=true,
84-
penalize_intercept::Bool=false)
100+
penalize_intercept::Bool=false,
101+
scale_penalty_with_samples::Bool=true)
85102
check_pos.((lambda, gamma))
86103
GLR(penalty=lambda*L2Penalty()+gamma*L1Penalty(),
87104
fit_intercept=fit_intercept,
88-
penalize_intercept=penalize_intercept)
105+
penalize_intercept=penalize_intercept,
106+
scale_penalty_with_samples=scale_penalty_with_samples)
89107
end
90108

91109

@@ -114,14 +132,18 @@ end
114132
"""
115133
$SIGNATURES
116134
117-
Objective function: ``L(y, Xθ) + λ|θ|₂²/2 + γ|θ|₁`` where `L` is either the
118-
logistic loss in the binary case or the multinomial loss otherwise.
135+
Objective function: ``L(y, Xθ) + n⋅λ|θ|₂²/2 + n⋅γ|θ|₁`` where `L` is either the
136+
logistic loss in the binary case or the multinomial loss otherwise and
137+
``n`` is the number of samples `size(X, 1)`.
138+
With `scale_penalty_with_samples = false` the objective function is
139+
``L(y, Xθ) + λ|θ|₂²/2 + γ|θ|₁``.
119140
"""
120141
function LogisticRegression::Real=1.0, γ::Real=0.0;
121142
lambda::Real=λ, gamma::Real=γ,
122143
penalty::Symbol=iszero(gamma) ? :l2 : :en,
123144
fit_intercept::Bool=true,
124145
penalize_intercept::Bool=false,
146+
scale_penalty_with_samples::Bool=true,
125147
multi_class::Bool=false,
126148
nclasses::Integer=0)
127149
penalty = _l1l2en(lambda, gamma, penalty, "Logistic regression")
@@ -134,14 +156,18 @@ function LogisticRegression(λ::Real=1.0, γ::Real=0.0;
134156
GLR(loss=loss,
135157
penalty=penalty,
136158
fit_intercept=fit_intercept,
137-
penalize_intercept=penalize_intercept)
159+
penalize_intercept=penalize_intercept,
160+
scale_penalty_with_samples=scale_penalty_with_samples)
138161
end
139162

140163
"""
141164
$SIGNATURES
142165
143-
Objective function: ``L(y, Xθ) + λ|θ|₂²/2 + γ|θ|₁`` where `L` is the
144-
multinomial loss.
166+
Objective function: ``L(y, Xθ) + n⋅λ|θ|₂²/2 + n⋅γ|θ|₁`` where `L` is the
167+
multinomial loss and
168+
``n`` is the number of samples `size(X, 1)`.
169+
With `scale_penalty_with_samples = false` the objective function is
170+
``L(y, Xθ) + λ|θ|₂²/2 + γ|θ|₁``.
145171
"""
146172
MultinomialRegression(a...; kwa...) =
147173
LogisticRegression(a...; multi_class=true, kwa...)
@@ -152,74 +178,94 @@ MultinomialRegression(a...; kwa...) =
152178
"""
153179
$SIGNATURES
154180
155-
Objective function: ``∑ρ(Xθ - y) + λ|θ|₂² + γ|θ|₁`` where ρ is a given function
156-
on the residuals.
181+
Objective function: ``∑ρ(Xθ - y) + n⋅λ|θ|₂² + n⋅γ|θ|₁`` where ρ is a given function
182+
on the residuals and
183+
``n`` is the number of samples `size(X, 1)`.
184+
With `scale_penalty_with_samples = false` the objective function is
185+
``∑ρ(Xθ - y) + λ|θ|₂² + γ|θ|₁``.
157186
"""
158187
function RobustRegression::RobustRho=HuberRho(0.1), λ::Real=1.0, γ::Real=0.0;
159188
rho::RobustRho=ρ, lambda::Real=λ, gamma::Real=γ,
160189
penalty::Symbol=iszero(gamma) ? :l2 : :en,
161190
fit_intercept::Bool=true,
191+
scale_penalty_with_samples::Bool=true,
162192
penalize_intercept::Bool=false)
163193
penalty = _l1l2en(lambda, gamma, penalty, "Robust regression")
164194
GLR(loss=RobustLoss(rho),
165195
penalty=penalty,
166196
fit_intercept=fit_intercept,
167-
penalize_intercept=penalize_intercept)
197+
penalize_intercept=penalize_intercept,
198+
scale_penalty_with_samples=scale_penalty_with_samples)
168199
end
169200

170201
"""
171202
$SIGNATURES
172203
173204
Huber Regression with objective:
174205
175-
``∑ρ(Xθ - y) + λ|θ|₂²/2 + γ|θ|₁``
206+
``∑ρ(Xθ - y) + n⋅λ|θ|₂²/2 + n⋅γ|θ|₁``
176207
177208
Where `ρ` is the Huber function `ρ(r) = r²/2`` if `|r|≤δ` and
178-
`ρ(r)=δ(|r|-δ/2)` otherwise.
209+
`ρ(r)=δ(|r|-δ/2)` otherwise and
210+
``n`` is the number of samples `size(X, 1)`.
211+
With `scale_penalty_with_samples = false` the objective function is
212+
``∑ρ(Xθ - y) + λ|θ|₂²/2 + γ|θ|₁``.
179213
"""
180214
function HuberRegression::Real=0.5, λ::Real=1.0, γ::Real=0.0;
181215
delta::Real=δ, lambda::Real=λ, gamma::Real=γ,
182216
penalty::Symbol=iszero(gamma) ? :l2 : :en,
183217
fit_intercept::Bool=true,
218+
scale_penalty_with_samples::Bool=true,
184219
penalize_intercept::Bool=false)
185220
return RobustRegression(HuberRho(delta), lambda, gamma;
186221
penalty=penalty, fit_intercept=fit_intercept,
187-
penalize_intercept=penalize_intercept)
222+
penalize_intercept=penalize_intercept,
223+
scale_penalty_with_samples=scale_penalty_with_samples)
188224
end
189225

190226
"""
191227
$SIGNATURES
192228
193229
Quantile Regression with objective:
194230
195-
``∑ρ(Xθ - y) + λ|θ|₂²/2 + γ|θ|₁``
231+
``∑ρ(Xθ - y) + n⋅λ|θ|₂²/2 + n⋅γ|θ|₁``
196232
197-
Where `ρ` is the check function `ρ(r) = r(δ - 1(r < 0))`.
233+
Where `ρ` is the check function `ρ(r) = r(δ - 1(r < 0))` and
234+
``n`` is the number of samples `size(X, 1)`.
235+
With `scale_penalty_with_samples = false` the objective function is
236+
``∑ρ(Xθ - y) + λ|θ|₂²/2 + γ|θ|₁``.
198237
"""
199238
function QuantileRegression::Real=0.5, λ::Real=1.0, γ::Real=0.0;
200239
delta::Real=δ, lambda::Real=λ, gamma::Real=γ,
201240
penalty::Symbol=iszero(gamma) ? :l2 : :en,
202241
fit_intercept::Bool=true,
242+
scale_penalty_with_samples::Bool=true,
203243
penalize_intercept::Bool=false)
204244
return RobustRegression(QuantileRho(delta), lambda, gamma;
205245
penalty=penalty, fit_intercept=fit_intercept,
206-
penalize_intercept=penalize_intercept)
246+
penalize_intercept=penalize_intercept,
247+
scale_penalty_with_samples=scale_penalty_with_samples)
207248
end
208249

209250
"""
210251
$SIGNATURES
211252
212253
Least Absolute Deviation regression with objective:
213254
214-
``|Xθ - y|₁ + λ|θ|₂²/2 + γ|θ|₁``
255+
``|Xθ - y|₁ + n⋅λ|θ|₂²/2 + n⋅γ|θ|₁``
256+
where ``n`` is the number of samples `size(X, 1)`.
257+
With `scale_penalty_with_samples = false` the objective function is
258+
``|Xθ - y|₁ + λ|θ|₂²/2 + γ|θ|₁``.
215259
216260
This is a specific type of Quantile Regression with `δ=0.5` (median).
217261
"""
218262
function LADRegression::Real=1.0, γ::Real=0.0;
219263
lambda::Real=λ, gamma::Real=γ,
220264
penalty::Symbol=iszero(gamma) ? :l2 : :en,
265+
scale_penalty_with_samples::Bool=true,
221266
fit_intercept::Bool=true, penalize_intercept::Bool=false)
222267
return QuantileRegression(0.5, lambda, gamma;
223268
penalty=penalty, fit_intercept=fit_intercept,
224-
penalize_intercept=penalize_intercept)
269+
penalize_intercept=penalize_intercept,
270+
scale_penalty_with_samples=scale_penalty_with_samples)
225271
end

src/glr/d_l2loss.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
function Hv!(glr::GLR{L2Loss,<:L2R}, X, y, scratch)
1616
n, p = size(X)
17-
λ = getscale(glr.penalty)
17+
λ = get_penalty_scale(glr, n)
1818
if glr.fit_intercept
1919
# H = [X 1]'[X 1] + λ I
2020
# rows a 1:p = [X'X + λI | X'1]
@@ -61,7 +61,7 @@ end
6161
# ---------------------------------------------------------
6262

6363
function smooth_fg!(glr::GLR{L2Loss,<:ENR}, X, y, scratch)
64-
λ = getscale_l2(glr.penalty)
64+
λ = get_penalty_scale_l2(glr, length(y))
6565
(g, θ) -> begin
6666
# cache contains the residuals (Xθ-y)
6767
r = scratch.n

src/glr/d_logistic.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
# ---------------------------------------------------------
1313

1414
function fgh!(glr::GLR{LogisticLoss,<:L2R}, X, y, scratch)
15-
J = objective(glr) # GLR objective (loss+penalty)
1615
n, p = size(X)
17-
λ = getscale(glr.penalty)
16+
J = objective(glr, n) # GLR objective (loss+penalty)
17+
λ = get_penalty_scale(glr, n)
1818
if glr.fit_intercept
1919
(f, g, H, θ) -> begin
2020
= scratch.n
@@ -78,7 +78,7 @@ end
7878

7979
function Hv!(glr::GLR{LogisticLoss,<:L2R}, X, y, scratch)
8080
n, p = size(X)
81-
λ = getscale(glr.penalty)
81+
λ = get_penalty_scale(glr, n)
8282
if glr.fit_intercept
8383
# H = [X 1]'Λ[X 1] + λ I
8484
# rows a 1:p = [X'ΛX + λI | X'Λ1]
@@ -155,7 +155,7 @@ end
155155
function fg!(glr::GLR{<:MultinomialLoss,<:L2R}, X, y, scratch)
156156
n, p = size(X)
157157
c = getc(glr, y)
158-
λ = getscale(glr.penalty)
158+
λ = get_penalty_scale(glr, n)
159159
(f, g, θ) -> begin
160160
P = scratch.nc
161161
apply_X!(P, X, θ, c, scratch) # O(npc) store n * c
@@ -208,8 +208,8 @@ function fg!(glr::GLR{<:MultinomialLoss,<:L2R}, X, y, scratch)
208208
end
209209

210210
function Hv!(glr::GLR{<:MultinomialLoss,<:L2R}, X, y, scratch)
211-
p = size(X, 2)
212-
λ = getscale(glr.penalty)
211+
n, p = size(X)
212+
λ = get_penalty_scale(glr, n)
213213
c = getc(glr, y)
214214
# NOTE:
215215
# * ideally P and Q should be recuperated from gradient computations (fghv!)

0 commit comments

Comments
 (0)