Skip to content

Commit e2e3076

Browse files
authored
Noscratch2 (#70)
* updating bm * hoisting allocations outside of factories, closes #68 * closes #69 and patch release
1 parent b6cc040 commit e2e3076

15 files changed

+197
-192
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "MLJLinearModels"
22
uuid = "6ee0df7b-362f-4a72-a706-9e79364fb692"
33
authors = ["Thibaut Lienart <[email protected]>"]
4-
version = "0.3.5"
4+
version = "0.3.6"
55

66
[deps]
77
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"

src/fit/analytical.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ Assuming `n` dominates `p`,
1515
* iterative (conjugate gradient): O(κnp) - with κ the number of CG steps
1616
(κ ≤ p).
1717
"""
18-
function _fit(glr::GLR{L2Loss,<:L2R}, solver::Analytical, X, y)
18+
function _fit(glr::GLR{L2Loss,<:L2R}, solver::Analytical, X, y, scratch)
1919
# full solve
2020
if !solver.iterative
2121
λ = getscale(glr.penalty)
@@ -37,7 +37,7 @@ function _fit(glr::GLR{L2Loss,<:L2R}, solver::Analytical, X, y)
3737
p = size(X, 2) + Int(glr.fit_intercept)
3838
max_cg_steps = min(solver.max_inner, p)
3939
# Form the Hessian map, cost of application H*v is O(np)
40-
Hm = LinearMap(Hv!(glr, X, y), p;
40+
Hm = LinearMap(Hv!(glr, X, y, scratch), p;
4141
ismutating=true, isposdef=true, issymmetric=true)
4242
b = X'y
4343
glr.fit_intercept && (b = vcat(b, sum(y)))

src/fit/default.jl

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,5 +36,18 @@ the loss and penalty of the model. A method can, in some cases, be specified.
3636
function fit(glr::GLR, X::AbstractMatrix{<:Real}, y::AVR;
3737
solver::Solver=_solver(glr, size(X)))
3838
check_nrows(X, y)
39-
return _fit(glr, solver, X, y)
39+
n, p = size(X)
40+
c = glr.loss isa MultinomialLoss ? maximum(y) : 0
41+
return _fit(glr, solver, X, y, scratch(n, p, c, i=glr.fit_intercept))
4042
end
43+
44+
function scratch(n, p, c=0; i=false)
45+
p_ = p + Int(i)
46+
s = (n=zeros(n), n2=zeros(n), n3=zeros(n), p=zeros(p_))
47+
if !iszero(c)
48+
s = (s..., nc=zeros(n,c), nc2=zeros(n,c), nc3=zeros(n,c),
49+
nc4=zeros(n,c), pc=zeros(p_,c))
50+
end
51+
return s
52+
end
53+
scratch(X; kw...) = scratch(size(X)...; kw...)

src/fit/iwls.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
function _fit(glr::GLR{RobustLoss{ρ},<:L2R}, solver::IWLSCG, X, y) where {ρ}
1+
function _fit(glr::GLR{RobustLoss{ρ},<:L2R}, solver::IWLSCG, X, y, scratch
2+
) where {ρ}
23
λ = getscale(glr.penalty)
34
n = size(X, 1)
45
p = size(X, 2) + Int(glr.fit_intercept)
5-
_Mv! = Mv!(glr, X, y; threshold=solver.threshold)
6+
_Mv! = Mv!(glr, X, y, scratch; threshold=solver.threshold)
67
κ = solver.damping # between 0 and 1, 1 = fully take the new iteration
78
# cache
89
θ = zeros(p)

src/fit/newton.jl

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@ Assuming `n` dominates `p`, O(κnp²), dominated by the construction of the
1313
Hessian at each step with κ the number of Newton steps.
1414
"""
1515
function _fit(glr::GLR{<:Union{LogisticLoss,RobustLoss},<:L2R},
16-
solver::Newton, X, y)
16+
solver::Newton, X, y, scratch)
1717
p = size(X, 2) + Int(glr.fit_intercept)
1818
θ₀ = zeros(p)
19-
_fgh! = fgh!(glr, X, y)
19+
_fgh! = fgh!(glr, X, y, scratch)
2020
opt = Optim.only_fgh!(_fgh!)
2121
res = Optim.optimize(opt, θ₀, Optim.Newton())
2222
return Optim.minimizer(res)
@@ -35,12 +35,12 @@ Hessian at each step where κ₁ is the number of Newton steps and κ₂ is the
3535
average number of CG steps per Newton step (which is at most p).
3636
"""
3737
function _fit(glr::GLR{<:Union{LogisticLoss,RobustLoss},<:L2R},
38-
solver::NewtonCG, X, y)
38+
solver::NewtonCG, X, y, scratch)
3939
p = size(X, 2) + Int(glr.fit_intercept)
4040
θ₀ = zeros(p)
4141
_f = objective(glr, X, y)
42-
_fg! = (g, θ) -> fgh!(glr, X, y)(0.0, g, nothing, θ) # Optim.jl/issues/738
43-
_Hv! = Hv!(glr, X, y)
42+
_fg! = (g, θ) -> fgh!(glr, X, y, scratch)(0.0, g, nothing, θ) # Optim.jl/738
43+
_Hv! = Hv!(glr, X, y, scratch)
4444
opt = Optim.TwiceDifferentiableHV(_f, _fg!, _Hv!, θ₀)
4545
res = Optim.optimize(opt, θ₀, Optim.KrylovTrustRegion())
4646
return Optim.minimizer(res)
@@ -57,10 +57,10 @@ Assuming `n` dominates `p`, O(κnp), dominated by the computation of the
5757
gradient at each step with κ the number of LBFGS steps.
5858
"""
5959
function _fit(glr::GLR{<:Union{LogisticLoss,RobustLoss},<:L2R},
60-
solver::LBFGS, X, y)
60+
solver::LBFGS, X, y, scratch)
6161
p = size(X, 2) + Int(glr.fit_intercept)
6262
θ₀ = zeros(p)
63-
_fg! = (f, g, θ) -> fgh!(glr, X, y)(f, g, nothing, θ)
63+
_fg! = (f, g, θ) -> fgh!(glr, X, y, scratch)(f, g, nothing, θ)
6464
opt = Optim.only_fg!(_fg!)
6565
res = Optim.optimize(opt, θ₀, Optim.LBFGS())
6666
return Optim.minimizer(res)
@@ -82,13 +82,13 @@ computations are dominated by the application of the Hessian at each step with
8282
κ₁ the number of Newton steps and κ₂ the average number of CG steps per Newton
8383
step.
8484
"""
85-
function _fit(glr::GLR{MultinomialLoss,<:L2R}, solver::NewtonCG, X, y)
85+
function _fit(glr::GLR{MultinomialLoss,<:L2R}, solver::NewtonCG, X, y, scratch)
8686
p = size(X, 2) + Int(glr.fit_intercept)
8787
c = maximum(y)
8888
θ₀ = zeros(p * c)
8989
_f = objective(glr, X, y; c=c)
90-
_fg! = (g, θ) -> fg!(glr, X, y)(0.0, g, θ) # XXX: Optim.jl/issues/738
91-
_Hv! = Hv!(glr, X, y)
90+
_fg! = (g, θ) -> fg!(glr, X, y, scratch)(0.0, g, θ) # XXX: Optim.jl/738
91+
_Hv! = Hv!(glr, X, y, scratch)
9292
opt = Optim.TwiceDifferentiableHV(_f, _fg!, _Hv!, θ₀)
9393
res = Optim.optimize(opt, θ₀, Optim.KrylovTrustRegion())
9494
return Optim.minimizer(res)
@@ -105,11 +105,11 @@ Assuming `n` dominates `p`, O(κnpc), with `c` the number of classes, dominated
105105
by the computation of the gradient at each step with κ the number of LBFGS
106106
steps.
107107
"""
108-
function _fit(glr::GLR{MultinomialLoss,<:L2R}, solver::LBFGS, X, y)
108+
function _fit(glr::GLR{MultinomialLoss,<:L2R}, solver::LBFGS, X, y, scratch)
109109
p = size(X, 2) + Int(glr.fit_intercept)
110110
c = maximum(y)
111111
θ₀ = zeros(p * c)
112-
_fg! = fg!(glr, X, y)
112+
_fg! = fg!(glr, X, y, scratch)
113113
opt = Optim.only_fg!(_fg!)
114114
res = Optim.optimize(opt, θ₀, Optim.LBFGS())
115115
return Optim.minimizer(res)

src/fit/proxgrad.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# Assumption: loss has gradient; penalty has prox e.g.: Lasso
44
# J(θ) = f(θ) + r(θ) where f is smooth
5-
function _fit(glr::GLR, solver::ProxGrad, X, y)
5+
function _fit(glr::GLR, solver::ProxGrad, X, y, scratch)
66
c = ifelse(isa(glr.loss, MultinomialLoss), length(unique(y)), 1)
77
p = (size(X, 2) + Int(glr.fit_intercept)) * c
88
# vector caches + eval cache
@@ -20,7 +20,7 @@ function _fit(glr::GLR, solver::ProxGrad, X, y)
2020
acc = ifelse(solver.accel, 1.0, 0.0) # if 0, no extrapolation (ISTA)
2121
# functions
2222
_f = smooth_objective(glr, X, y; c=c)
23-
_fg! = smooth_fg!(glr, X, y)
23+
_fg! = smooth_fg!(glr, X, y, scratch)
2424
_prox! = prox!(glr)
2525
bt_cond = θ̂ ->
2626
_f(θ̂) > fθ̄ + dot(θ̂ .- θ̄, ∇fθ̄) + sum(abs2.(θ̂ .- θ̄)) / (2η)

src/glr/d_l2loss.jl

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,27 +12,27 @@
1212
# * Hv! used in iterative solution
1313
# ---------------------------------------------------------
1414

15-
function Hv!(glr::GLR{L2Loss,<:L2R}, X, y)
15+
function Hv!(glr::GLR{L2Loss,<:L2R}, X, y, scratch)
1616
n, p = size(X)
1717
λ = getscale(glr.penalty)
18-
# scratch allocation
19-
SCRATCH_N = zeros(n)
2018
if glr.fit_intercept
21-
SCRATCH_P = zeros(p)
2219
# H = [X 1]'[X 1] + λ I
2320
# rows a 1:p = [X'X + λI | X'1]
2421
# row e end = [1'X | n+λι] where ι is 1 if glr.penalize_intercept
2522
ι = float(glr.penalize_intercept)
2623
(Hv, v) -> begin
2724
# view on the first p rows
28-
a = 1:p
29-
Hvₐ = view(Hv, a)
30-
vₐ = view(v, a)
31-
Xt1 = SCRATCH_P
32-
copyto!(Xt1, sum(X, dims=1)) # -- X'1 (note: sum will allocate)
25+
a = 1:p
26+
Hvₐ = view(Hv, a)
27+
vₐ = view(v, a)
28+
Xt1 = view(scratch.p, a)
29+
Xt1 .*= 0
30+
@inbounds for i in a, j in 1:n
31+
Xt1[i] += X[j, i] # -- X'1
32+
end
3333
vₑ = v[end]
3434
# update for the first p rows -- (X'X + λI)v[1:p] + (X'1)v[end]
35-
Xvₐ = SCRATCH_N
35+
Xvₐ = scratch.n
3636
mul!(Xvₐ, X, vₐ)
3737
mul!(Hvₐ, X', Xvₐ)
3838
Hvₐ .+= λ .* vₐ .+ Xt1 .* vₑ
@@ -41,7 +41,7 @@ function Hv!(glr::GLR{L2Loss,<:L2R}, X, y)
4141
end
4242
else
4343
(Hv, v) -> begin
44-
Xv = SCRATCH_N
44+
Xv = scratch.n
4545
mul!(Xv, X, v) # -- Xv
4646
mul!(Hv, X', Xv) # -- X'Xv
4747
Hv .+= λ .* v # -- X'Xv + λv
@@ -60,13 +60,11 @@ end
6060
# -> prox_r = soft-thresh
6161
# ---------------------------------------------------------
6262

63-
function smooth_fg!(glr::GLR{L2Loss,<:ENR}, X, y)
63+
function smooth_fg!(glr::GLR{L2Loss,<:ENR}, X, y, scratch)
6464
λ = getscale_l2(glr.penalty)
65-
# scratch allocation
66-
SCRATCH_N = zeros(size(X, 1))
6765
(g, θ) -> begin
6866
# cache contains the residuals (Xθ-y)
69-
r = SCRATCH_N
67+
r = scratch.n
7068
get_residuals!(r, X, θ, y) # -- r = Xθ-y
7169
apply_Xt!(g, X, r)
7270
g .+= λ .* θ

0 commit comments

Comments
 (0)