Skip to content

Commit 266c5fc

Browse files
committed
2 parents 78e7586 + 9427a2b commit 266c5fc

18 files changed

+404
-395
lines changed

.travis.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@ os:
44
- linux
55
julia:
66
- 1.0
7-
- 1.1
8-
- 1.2
9-
- 1.3
7+
- 1.4
108
- nightly
9+
matrix:
10+
allow_failures:
11+
- julia: nightly
1112
notifications:
1213
email: false
1314
after_success:

Project.toml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "MLJLinearModels"
22
uuid = "6ee0df7b-362f-4a72-a706-9e79364fb692"
33
authors = ["Thibaut Lienart <[email protected]>"]
4-
version = "0.3.1"
4+
version = "0.3.3"
55

66
[deps]
77
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
@@ -16,8 +16,8 @@ Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
1616
DocStringExtensions = "^0.8"
1717
IterativeSolvers = "^0.8"
1818
LinearMaps = "^2.6"
19-
MLJModelInterface = "^0.1"
20-
Optim = "^0.20"
19+
MLJModelInterface = "^0.1,^0.2"
20+
Optim = "^0.20,^0.21"
2121
Parameters = "^0.12"
2222
julia = "^1"
2323

@@ -28,7 +28,8 @@ PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
2828
RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
2929
RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
3030
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
31+
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
3132
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
3233

3334
[targets]
34-
test = ["DelimitedFiles", "PyCall", "Test", "Random", "RDatasets", "RCall", "MLJBase"]
35+
test = ["DelimitedFiles", "PyCall", "Test", "Random", "RDatasets", "RCall", "MLJBase", "StableRNGs"]

src/fit/analytical.jl

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -11,35 +11,35 @@ Fit a least square regression either with no penalty (OLS) or with a L2 penalty
1111
Assuming `n` dominates `p`,
1212
1313
* non-iterative (full solve): O(np²) - dominated by the construction of the
14-
Hessian X'X.
14+
Hessian X'X.
1515
* iterative (conjugate gradient): O(κnp) - with κ the number of CG steps
16-
(κ ≤ p).
16+
(κ ≤ p).
1717
"""
1818
function _fit(glr::GLR{L2Loss,<:L2R}, solver::Analytical, X, y)
19-
# full solve
20-
if !solver.iterative
21-
λ = getscale(glr.penalty)
22-
if iszero(λ)
23-
# standard LS solution
24-
return augment_X(X, glr.fit_intercept) \ y
25-
else
26-
# Ridge case -- form the Hat Matrix then solve
27-
H = form_XtX(X, glr.fit_intercept, λ)
28-
b = X'y
29-
glr.fit_intercept && (b = vcat(b, sum(y)))
30-
return cholesky!(H) \ b
31-
end
32-
end
33-
# Iterative case, note that there is no augmentation here
34-
# it is done implicitly in the application of the Hessian to
35-
# avoid copying X
36-
# The number of CG steps to convergence is at most `p`
37-
p = size(X, 2) + Int(glr.fit_intercept)
38-
max_cg_steps = min(solver.max_inner, p)
39-
# Form the Hessian map, cost of application H*v is O(np)
40-
Hm = LinearMap(Hv!(glr, X, y), p;
41-
ismutating=true, isposdef=true, issymmetric=true)
42-
b = X'y
43-
glr.fit_intercept && (b = vcat(b, sum(y)))
44-
return cg(Hm, b; maxiter=max_cg_steps)
19+
# full solve
20+
if !solver.iterative
21+
λ = getscale(glr.penalty)
22+
if iszero(λ)
23+
# standard LS solution
24+
return augment_X(X, glr.fit_intercept) \ y
25+
else
26+
# Ridge case -- form the Hat Matrix then solve
27+
H = form_XtX(X, glr.fit_intercept, λ)
28+
b = X'y
29+
glr.fit_intercept && (b = vcat(b, sum(y)))
30+
return cholesky!(H) \ b
31+
end
32+
end
33+
# Iterative case, note that there is no augmentation here
34+
# it is done implicitly in the application of the Hessian to
35+
# avoid copying X
36+
# The number of CG steps to convergence is at most `p`
37+
p = size(X, 2) + Int(glr.fit_intercept)
38+
max_cg_steps = min(solver.max_inner, p)
39+
# Form the Hessian map, cost of application H*v is O(np)
40+
Hm = LinearMap(Hv!(glr, X, y), p;
41+
ismutating=true, isposdef=true, issymmetric=true)
42+
b = X'y
43+
glr.fit_intercept && (b = vcat(b, sum(y)))
44+
return cg(Hm, b; maxiter=max_cg_steps)
4545
end

src/fit/default.jl

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ export fit
1010
_solver(::GLR{L2Loss,<:L2R}, np::NTuple{2,Int}) = Analytical()
1111

1212
# Logistic, Multinomial
13-
_solver(::GLR{LogisticLoss,<:L2R}, np::NTuple{2,Int}) = LBFGS()
13+
_solver(::GLR{LogisticLoss,<:L2R}, np::NTuple{2,Int}) = LBFGS()
1414
_solver(::GLR{MultinomialLoss,<:L2R}, np::NTuple{2,Int}) = LBFGS()
1515

1616
# Lasso, ElasticNet, Logistic, Multinomial
1717
function _solver(glr::GLR{<:SmoothLoss,<:ENR}, np::NTuple{2,Int})
18-
(is_l1(glr.penalty) || is_elnet(glr.penalty)) && return FISTA()
19-
@error "Not yet implemented."
18+
(is_l1(glr.penalty) || is_elnet(glr.penalty)) && return FISTA()
19+
@error "Not yet implemented."
2020
end
2121

2222
# Robust, Quantile
@@ -34,19 +34,19 @@ Fit a generalised linear regression model using an appropriate solver based on
3434
the loss and penalty of the model. A method can, in some cases, be specified.
3535
"""
3636
function fit(glr::GLR, X::AbstractMatrix{<:Real}, y::AVR;
37-
solver::Solver=_solver(glr, size(X)))
37+
solver::Solver=_solver(glr, size(X)))
3838
check_nrows(X, y)
39-
n, p = size(X)
40-
p += Int(glr.fit_intercept)
41-
# allocate cache for temporary computations of size n/p
42-
# which are frequent but otherwise un-important so that
43-
# we can reduce the overall number of allocations
44-
# these are const Refs defined when the module is loaded
45-
c = glr.loss isa MultinomialLoss ? maximum(y) : 0
46-
allocate(n, p, c)
47-
# effective call to fit routine
39+
n, p = size(X)
40+
p += Int(glr.fit_intercept)
41+
# allocate cache for temporary computations of size n/p
42+
# which are frequent but otherwise un-important so that
43+
# we can reduce the overall number of allocations
44+
# these are const Refs defined when the module is loaded
45+
c = glr.loss isa MultinomialLoss ? maximum(y) : 0
46+
allocate(n, p, c)
47+
# effective call to fit routine
4848
θ = _fit(glr, solver, X, y)
49-
# de-allocate cache
50-
deallocate()
51-
return θ
49+
# de-allocate cache
50+
deallocate()
51+
return θ
5252
end

src/glr/constructors.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
export GeneralizedLinearRegression, GLR,
22
LinearRegression, RidgeRegression,
33
LassoRegression, ElasticNetRegression,
4-
LADRegression, MADRegression,
5-
LogisticRegression, MultinomialRegression,
6-
RobustRegression, HuberRegression, QuantileRegression
4+
LADRegression, LogisticRegression,
5+
MultinomialRegression, RobustRegression,
6+
HuberRegression, QuantileRegression
77

88
"""
99
GeneralizedLinearRegression{L<:Loss, P<:Penalty}

src/mlj/interface.jl

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,20 @@ const ALL_MODELS = (REG_MODELS..., CLF_MODELS...)
1919

2020
function MMI.fit(m::Union{REG_MODELS...}, verb::Int, X, y)
2121
Xmatrix = MMI.matrix(X)
22+
features = (sch = MMI.schema(X)) === nothing ? nothing : sch.names
2223
reg = glr(m)
2324
solver = m.solver === nothing ? _solver(reg, size(Xmatrix)) : m.solver
2425
# get the parameters
2526
θ = fit(reg, Xmatrix, y; solver=solver)
2627
# return
27-
return θ, nothing, NamedTuple{}()
28+
return (θ, features), nothing, NamedTuple{}()
2829
end
2930

30-
MMI.predict(m::Union{REG_MODELS...}, θ, Xnew) = apply_X(MMI.matrix(Xnew), θ)
31+
MMI.predict(m::Union{REG_MODELS...}, (θ, features), Xnew) = apply_X(MMI.matrix(Xnew), θ)
3132

32-
function MMI.fitted_params(m::Union{REG_MODELS...}, θ)
33-
m.fit_intercept && return (coefs = θ[1:end-1], intercept = θ[end])
34-
return (coefs = θ, intercept = nothing)
33+
function MMI.fitted_params(m::Union{REG_MODELS...}, (θ, features))
34+
m.fit_intercept && return (coefs = coef_vec(θ[1:end-1], features), intercept = θ[end])
35+
return (coefs = coef_vec(θ, features), intercept = nothing)
3536
end
3637

3738
#= ===========
@@ -40,6 +41,7 @@ end
4041

4142
function MMI.fit(m::Union{CLF_MODELS...}, verb::Int, X, y)
4243
Xmatrix = MMI.matrix(X)
44+
features = (sch = MMI.schema(X)) === nothing ? nothing : sch.names
4345
yplain = convert.(Int, MMI.int(y))
4446
classes = MMI.classes(y[1])
4547
nclasses = length(classes)
@@ -56,10 +58,10 @@ function MMI.fit(m::Union{CLF_MODELS...}, verb::Int, X, y)
5658
# get the parameters
5759
θ = fit(clf, Xmatrix, yplain, solver=solver)
5860
# return
59-
return (θ, c, classes), nothing, NamedTuple{}()
61+
return (θ, features, c, classes), nothing, NamedTuple{}()
6062
end
6163

62-
function MMI.predict(m::Union{CLF_MODELS...}, (θ, c, classes), Xnew)
64+
function MMI.predict(m::Union{CLF_MODELS...}, (θ, features, c, classes), Xnew)
6365
Xmatrix = MMI.matrix(Xnew)
6466
preds = apply_X(Xmatrix, θ, c)
6567
# binary classification
@@ -73,20 +75,29 @@ function MMI.predict(m::Union{CLF_MODELS...}, (θ, c, classes), Xnew)
7375
return [MMI.UnivariateFinite(classes, preds[i, :]) for i in 1:size(Xmatrix,1)]
7476
end
7577

76-
function MMI.fitted_params(m::Union{CLF_MODELS...}, (θ, c, classes))
78+
function MMI.fitted_params(m::Union{CLF_MODELS...}, (θ, features, c, classes))
79+
function _fitted_params(coefs, features, intercept)
80+
return (classes = classes, coefs = coef_vec(coefs, features), intercept = intercept)
81+
end
7782
if c > 1
83+
W = reshape(θ, :, c)
7884
if m.fit_intercept
79-
W = reshape(θ, div(length(θ), c), c)
80-
return (coefs = W, intercept = nothing)
85+
return _fitted_params(W, features, W[end, :])
8186
end
82-
W = reshape(θ, p+1, c)
83-
return (coefs = W[1:p, :], intercept = W[end, :])
87+
return _fitted_params(W[1:end-1, :], features, nothing)
8488
end
8589
# single class
86-
m.fit_intercept && return (coefs = θ[1:end-1], intercept = θ[end])
87-
return (coefs = θ, intercept = nothing)
90+
m.fit_intercept && return _fitted_params(θ[1:end-1], features, θ[end])
91+
return _fitted_params(θ, features, nothing)
8892
end
8993

94+
@static VERSION < v"1.1" && (eachrow(A::AbstractVecOrMat) = (view(A, i, :) for i in axes(A, 1)))
95+
96+
coef_vec(W::AbstractMatrix, features) = [feature => coef for (feature, coef) in zip(features, eachrow(W))]
97+
coef_vec::AbstractVector, features) = [feature => coef for (feature, coef) in zip(features, θ)]
98+
coef_vec(W::AbstractMatrix, ::Nothing) = W
99+
coef_vec::AbstractVector, ::Nothing) = θ
100+
90101
#= =======================
91102
METADATA FOR ALL MODELS
92103
======================= =#

0 commit comments

Comments
 (0)