Skip to content

Commit abf145f

Browse files
committed
Merge branch 'dev'
2 parents 56324b5 + 47c0415 commit abf145f

File tree

8 files changed

+82
-40
lines changed

8 files changed

+82
-40
lines changed

Project.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "MLJLinearModels"
22
uuid = "6ee0df7b-362f-4a72-a706-9e79364fb692"
33
authors = ["Thibaut Lienart <[email protected]>"]
4-
version = "0.5.5"
4+
version = "0.5.6"
55

66
[deps]
77
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
@@ -24,6 +24,7 @@ julia = "^1"
2424
[extras]
2525
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
2626
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
27+
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
2728
MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
2829
PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
2930
RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
@@ -33,4 +34,4 @@ StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
3334
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
3435

3536
[targets]
36-
test = ["DelimitedFiles", "PyCall", "Test", "Random", "RDatasets", "RCall", "MLJBase", "StableRNGs", "DataFrames"]
37+
test = ["DelimitedFiles", "PyCall", "ForwardDiff", "Test", "Random", "RDatasets", "RCall", "MLJBase", "StableRNGs", "DataFrames"]

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
| [Linux] | Coverage | Documentation |
44
| :------------ | :------- | :------------ |
5-
| [![Build Status](https://github.com/alan-turing-institute/MLJLinearModels.jl/workflows/CI/badge.svg)](https://github.com/alan-turing-institute/MLJLinearModels.jl/actions) | [![codecov.io](http://codecov.io/github/alan-turing-institute/MLJLinearModels.jl/coverage.svg?branch=master)](http://codecov.io/github/alan-turing-institute/MLJLinearModels.jl?branch=master) | [![stable-doc](https://img.shields.io/badge/docs-stable-blue.svg)](https://alan-turing-institute.github.io/MLJLinearModels.jl/stable/) [![dev-doc](https://img.shields.io/badge/docs-dev-blue.svg)](https://alan-turing-institute.github.io/MLJLinearModels.jl/dev/) |
5+
| [![Build Status](https://github.com/JuliaAI/MLJLinearModels.jl/workflows/CI/badge.svg)](https://github.com/JuliaAI/MLJLinearModels.jl/actions) | [![codecov.io](http://codecov.io/github/JuliaAI/MLJLinearModels.jl/coverage.svg?branch=master)](http://codecov.io/github/JuliaAI/MLJLinearModels.jl?branch=master) | [![stable-doc](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaAI.github.io/MLJLinearModels.jl/stable/) [![dev-doc](https://img.shields.io/badge/docs-dev-blue.svg)](https://JuliaAI.github.io/MLJLinearModels.jl/dev/) |
66

77
This is a package gathering functionalities to solve a number of generalised linear regression/classification problems which, inherently, correspond to an optimisation problem of the form
88

@@ -20,7 +20,7 @@ The core aims of this package are:
2020
- focus on performance including in "big data" settings exploiting packages such as [`Optim.jl`](https://github.com/JuliaNLSolvers/Optim.jl), [`IterativeSolvers.jl`](https://github.com/JuliaMath/IterativeSolvers.jl),
2121
- use a "machine learning" perspective, i.e.: focus essentially on prediction, hyper-parameters should be obtained via a data-driven procedure such as cross-validation.
2222

23-
Head to the [quickstart section of the docs](https://alan-turing-institute.github.io/MLJLinearModels.jl/dev/quickstart/) to see how to use this package.
23+
Head to the [quickstart section of the docs](https://JuliaAI.github.io/MLJLinearModels.jl/dev/quickstart/) to see how to use this package.
2424

2525
# NOTES
2626

@@ -63,7 +63,7 @@ Unless otherwise specified:
6363
- (_against [scikit-learn](https://scikit-learn.org/)_): Lasso, Elastic-Net, Logistic (L1/L2/EN), Multinomial (L1/L2/EN)
6464
- (_against [quantreg](https://cran.r-project.org/web/packages/quantreg/index.html)_): Quantile (0/L1)
6565

66-
Systematic timing benchmarks have not been run yet but it's planned (see [this issue](https://github.com/alan-turing-institute/MLJLinearModels.jl/issues/14)).
66+
Systematic timing benchmarks have not been run yet but it's planned (see [this issue](https://github.com/JuliaAI/MLJLinearModels.jl/issues/14)).
6767

6868
### Current limitations
6969

src/glr/constructors.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ end
3232

3333
const GLR = GeneralizedLinearRegression
3434

35-
getc(g::GLR) = getc(g.loss)
35+
getc(g::GLR) = getc(g.loss)
3636
getc(g::GLR, y) = getc(g.loss, y)
3737

3838
## Specific constructors

src/glr/d_l2loss.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# ----------------------- #
66
# -- Ridge Regression -- #
77
# ----------------------- #
8-
# -> f(θ) = |Xθ - y|₂²/2 + λ|θ|₂²
8+
# -> f(θ) = |Xθ - y|₂²/2 + λ|θ|₂²/2
99
# -> ∇f(θ) = X'(Xθ - y) + λθ
1010
# -> ∇²f(θ) = X'X + λI
1111
# NOTE:

src/glr/d_logistic.jl

Lines changed: 31 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
# ------------------------------- #
22
# -- Logistic Regression (L2) -- #
33
# ------------------------------- #
4-
# -> f(θ) = -∑logσ(yXθ) + λ|θ|₂²
5-
# -> ∇f(θ) = -X'(yσ(-yXθ)) + λθ
6-
# -> ∇²f(θ) = X'(σ(yXθ))X + λI
4+
# -> f(θ) = -∑logσ(yXθ) + λ|θ|₂²/2
5+
# -> ∇f(θ) = X'(y(w-1)) + λθ
6+
# -> ∇²f(θ) = X' Diag(w(1-w)) X + λI
77
# NOTE:
8+
# * w = σ(yXθ)
89
# * yᵢ ∈ {±1} so that y² = 1
9-
# * -σ(-x) ==(σ(x)-1)
10+
# * -σ(-x) == (σ(x)-1)
11+
# NOTE: https://github.com/JuliaAI/MLJLinearModels.jl/issues/104
1012
# ---------------------------------------------------------
1113

1214
function fgh!(glr::GLR{LogisticLoss,<:L2R}, X, y, scratch)
@@ -17,12 +19,12 @@ function fgh!(glr::GLR{LogisticLoss,<:L2R}, X, y, scratch)
1719
(f, g, H, θ) -> begin
1820
= scratch.n
1921
apply_X!(Xθ, X, θ) # -- Xθ = apply_X(X, θ)
20-
# precompute σ(yXθ) use -σ(-x) = (σ(x)-1)
21-
w = scratch.n2
22-
w .= σ.(Xθ .* y) # -- w = σ.(Xθ .* y)
22+
# precompute σ(yXθ)
23+
w = scratch.n2
24+
w .= σ.(Xθ .* y) # -- w = σ.(Xθ .* y)
2325
g === nothing || begin
2426
t = scratch.n3
25-
t .= y .* (w .- 1.0) # -- t = y .* (w .- 1.0)
27+
t .= y .* w .- y # -- t = y .* (w .- 1.0)
2628
apply_Xt!(g, X, t) # -- g = X't
2729
g .+= λ .* θ
2830
glr.penalize_intercept || (g[end] -= λ * θ[end])
@@ -31,19 +33,21 @@ function fgh!(glr::GLR{LogisticLoss,<:L2R}, X, y, scratch)
3133
# NOTE: we could try to be clever to reduce the allocations for
3234
# ΛX but computing the full hessian allocates a lot anyway so
3335
# probably not really worth it
34-
a = 1:p
35-
ΛX = w .* X # !! big allocs
36-
mul!(view(H, a, a), X', ΛX) # -- H[1:p,1:p] = X'ΛX
36+
t = scratch.n3
37+
t .= w .- w.^2 # σ(yXθ)(1-σ(yXθ))
38+
a = 1:p
39+
ΛX = t .* X # !! big allocs
40+
mul!(view(H, a, a), X', ΛX) # -- H[1:p,1:p] = X'ΛX
3741
ΛXt1 = view(scratch.p, a)
3842
ΛXt1 .*= 0
3943
@inbounds for i in a, j in 1:n
40-
ΛXt1[i] += ΛX[j, i] # -- (ΛX)'1
44+
ΛXt1[i] += ΛX[j, i] # -- (ΛX)'1
4145
end
4246
@inbounds for i in a
4347
H[i, end] = H[end, i] = ΛXt1[i] # -- H[:,p+1] = (ΛX)'1
4448
end
45-
H[end, end] = sum(w) # -- 1'Λ1'
46-
add_λI!(H, λ, glr.penalize_intercept) # -- H = X'ΛX + λI
49+
H[end, end] = sum(t) # -- 1'Λ1'
50+
add_λI!(H, λ, glr.penalize_intercept) # -- H = -X'ΛX + λI
4751
end
4852
f === nothing || return J(y, Xθ, view_θ(glr, θ))
4953
end
@@ -53,16 +57,18 @@ function fgh!(glr::GLR{LogisticLoss,<:L2R}, X, y, scratch)
5357
(f, g, H, θ) -> begin
5458
= scratch.n
5559
apply_X!(Xθ, X, θ)
56-
w = scratch.n2
57-
w .= σ.(y .* Xθ)
60+
w = scratch.n2
61+
w .= σ.(y .* Xθ)
5862
g === nothing || begin
5963
t = scratch.n3
60-
t .= y .* (w .- 1.0)
64+
t .= y .* w .- y
6165
apply_Xt!(g, X, t)
6266
g .+= λ .* θ
6367
end
6468
H === nothing || begin
65-
mul!(H, X', w .* X)
69+
t = scratch.n3
70+
t .= w .- w.^2
71+
mul!(H, X', t .* X)
6672
add_λI!(H, λ)
6773
end
6874
f === nothing || return J(y, Xθ, θ)
@@ -80,8 +86,9 @@ function Hv!(glr::GLR{LogisticLoss,<:L2R}, X, y, scratch)
8086
(Hv, θ, v) -> begin
8187
= scratch.n
8288
apply_X!(Xθ, X, θ) # -- Xθ = apply_X(X, θ)
83-
w = scratch.n2
84-
w .= σ.(Xθ .* y) # -- w = σ.(Xθ .* y)
89+
w = scratch.n2
90+
w .= σ.(Xθ .* y) # -- w = σ.(Xθ .* y)
91+
w .-= w.^2 # -- w = w(1-w)
8592
# view on the first p rows
8693
a = 1:p
8794
Hvₐ = view(Hv, a)
@@ -103,9 +110,10 @@ function Hv!(glr::GLR{LogisticLoss,<:L2R}, X, y, scratch)
103110
(Hv, θ, v) -> begin
104111
= scratch.n
105112
apply_X!(Xθ, X, θ)
106-
w = scratch.n2
107-
w .= σ.(Xθ .* y) # -- σ(yXθ)
108-
Xv = scratch.n
113+
w = scratch.n2
114+
w .= σ.(Xθ .* y) # -- σ(yXθ)
115+
w .-= w.^2
116+
Xv = scratch.n
109117
mul!(Xv, X, v)
110118
Xv .*= scratch.n2 # -- ΛXv
111119
mul!(Hv, X', Xv) # -- X'ΛXv

test/fit/quantile.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ y1a = outlify(y1, 0.1)
9191
θ_ista = fit(rr, X, y1a, solver=ISTA())
9292
θ_qr_lasso = rcopy(QUANTREG.rq_fit_lasso(X1, y1a))[:coefficients]
9393
@test isapprox(J(θ_ls), 888.3748, rtol=1e-5)
94-
@test isapprox(J(θ_qr_lasso), 425.5977, rtol=1e-5)
94+
@test isapprox(J(θ_qr_lasso), 425.5264, rtol=1e-5)
9595
# Our algorithms are close enough
9696
@test isapprox(J(θ_fista), 425.0526, rtol=1e-5)
9797
@test isapprox(J(θ_ista), 425.4113, rtol=1e-5)

test/glr/grad-hess-prox.jl

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@ end
7272
@test g X1' * (X1*θ1 .- y1) .+ λ .* θ1 .* vcat(ones(p), 0)
7373
end
7474

75+
n, p = 50, 5
76+
((X, y, θ), (X1, y1, θ1)) = generate_binary(n, p; seed=1212224)
77+
maskint = vcat(ones(p), 0.0)
78+
7579
@testset "GH> LogitL2" begin
7680
rng = StableRNG(551551)
7781
# fgh! without fit_intercept
@@ -85,17 +89,29 @@ end
8589
g = similar(θ)
8690
H = zeros(p, p)
8791
f = fgh!(f, g, H, θ)
92+
93+
wminus = R.σ.(-y .* (X * θ))
94+
wplus = R.σ.(y .* (X * θ))
95+
8896
@test f == J(θ)
89-
@test g -X' * (y .* R.σ.(-y .* (X * θ))) .+ λ .* θ
90-
@test H X' * (Diagonal(R.σ.(y .* (X * θ))) * X) + λ * I
97+
@test g -X' * (y .* wminus) .+ λ .* θ
98+
@test H X' * (Diagonal(wplus .* (1 .- wplus)) * X) + λ * I
99+
100+
# Use ForwardDiff
101+
logsigmoid(x) = -log1p(exp(-x))
102+
objective2(θ_) = -sum(logsigmoid.(y .* (X*θ_))) + λ * sum(abs2, θ_) / 2
103+
fd_grad = ForwardDiff.gradient(θ_ -> objective2(θ_), θ)
104+
fd_hess = ForwardDiff.hessian(θ_ -> objective2(θ_), θ)
105+
@test g fd_grad
106+
@test H fd_hess
91107

92108
# Hv! without fit_intercept
93109
s = R.scratch(X; i=false)
94110
Hv! = R.Hv!(lr, X, y, s)
95111
v = randn(rng, p)
96112
Hv = similar(v)
97113
Hv!(Hv, θ, v)
98-
@test Hv H * v
114+
@test Hv H * v
99115

100116
# fgh! with fit_intercept
101117
s = R.scratch(X; i=true)
@@ -108,16 +124,28 @@ end
108124
g1 = similar(θ1)
109125
H1 = zeros(p+1, p+1)
110126
f1 = fgh!(f1, g1, H1, θ1)
127+
128+
wminus = R.σ.(-y .* (X1 * θ1))
129+
wplus = R.σ.(y .* (X1 * θ1))
130+
111131
@test f1 J(θ1)
112-
@test g1 -X1' * (y .* R.σ.(-y .* (X1 * θ1))) .+ λ .* θ1
113-
@test H1 X1' * (Diagonal(R.σ.(y .* (X1 * θ1))) * X1) + λ * I
132+
@test g1 -X1' * (y .* wminus) .+ λ .* θ1
133+
@test H1 X1' * (Diagonal(wplus .* (1 .- wplus)) * X1) + λ * I
134+
135+
# Use ForwardDiff
136+
logsigmoid(x) = -log1p(exp(-x))
137+
objective2(θ_) = -sum(logsigmoid.(y .* (X1*θ_))) + λ * sum(abs2, θ_) / 2
138+
fd_grad = ForwardDiff.gradient(θ_ -> objective2(θ_), θ1)
139+
fd_hess = ForwardDiff.hessian(θ_ -> objective2(θ_), θ1)
140+
@test g1 fd_grad
141+
@test H1 fd_hess
114142

115143
# Hv! with fit_intercept
116144
Hv! = R.Hv!(lr1, X, y, s)
117145
v = randn(rng, p+1)
118146
Hv = similar(v)
119147
Hv!(Hv, θ1, v)
120-
@test Hv H1 * v
148+
@test Hv H1 * v
121149

122150
# fgh! with fit intercept and no penalty on intercept
123151
lr1 = LogisticRegression(λ)
@@ -128,9 +156,13 @@ end
128156
g1 = similar(θ1)
129157
H1 = zeros(p+1, p+1)
130158
f1 = fgh!(f1, g1, H1, θ1)
159+
160+
wminus = R.σ.(-y .* (X1 * θ1))
161+
wplus = R.σ.(y .* (X1 * θ1))
162+
131163
@test f1 J(θ1)
132-
@test g1 -X1' * (y .* R.σ.(-y .* (X1 * θ1))) .+ λ .* θ1 .* maskint
133-
@test H1 X1' * (Diagonal(R.σ.(y .* (X1 * θ1))) * X1) + λ * Diagonal(maskint)
164+
@test g1 -X1' * (y .* wminus) .+ λ .* θ1 .* maskint
165+
@test H1 X1' * (Diagonal(wplus .* (1.0 .- wplus)) * X1) + λ * Diagonal(maskint)
134166
Hv! = R.Hv!(lr1, X, y, s)
135167
v = randn(rng, p+1)
136168
Hv = similar(v)

test/runtests.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using MLJLinearModels, Test, LinearAlgebra, Random, StableRNGs, DataFrames
1+
using MLJLinearModels, Test, LinearAlgebra
2+
using Random, StableRNGs, DataFrames, ForwardDiff
23
import MLJBase # not MLJModelInterface, to mimic the full interface
34

45
DO_COMPARISONS = false; include("testutils.jl")

0 commit comments

Comments
 (0)