Skip to content

Commit 3cc208b

Browse files
authored
Malloc (#25)
* decreasing allocs via temp * using morel legible refs * reviews 1 * adding tests for scratchspace * add scratch for robust * tests ok * adding a bunch of tests and completing the PR
1 parent adfb768 commit 3cc208b

16 files changed

+395
-116
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ Systematic timing benchmarks have not been run yet but it's planned (see [this i
6464
* The models are built and tested assuming `n > p`; if this doesn't hold, tricks should be employed to speed up computations; these have not been implemented yet.
6565
* CV-aware code not implemented yet (code that re-uses computations when fitting over a number of hyper-parameters); "Meta" functionalities such as One-vs-All or Cross-Validation are left to other packages such as MLJ.
6666
* Stochastic solvers have not yet been implemented.
67+
* All computations are assumed to be done in Float64.
6768

6869
### Possible future models
6970

src/MLJLinearModels.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ import Base.+, Base.-, Base.*, Base./, Base.convert
1010

1111
const AVR = AbstractVector{<:Real}
1212

13+
include("scratchspace.jl")
14+
1315
include("utils.jl")
1416

1517
# > Loss / penalty definitions <

src/fit/default.jl

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,5 +32,17 @@ the loss and penalty of the model. A method can, in some cases, be specified.
3232
function fit(glr::GLR, X::AbstractMatrix{<:Real}, y::AVR;
3333
solver::Solver=_solver(glr, size(X)))
3434
check_nrows(X, y)
35-
_fit(glr, solver, X, y)
35+
n, p = size(X)
36+
p += Int(glr.fit_intercept)
37+
# allocate cache for temporary computations of size n/p
38+
# which are frequent but otherwise un-important so that
39+
# we can reduce the overall number of allocations
40+
# these are const Refs defined when the module is loaded
41+
c = glr.loss isa MultinomialLoss ? maximum(y) : 0
42+
allocate(n, p, c)
43+
# effective call to fit routine
44+
θ = _fit(glr, solver, X, y)
45+
# de-allocate cache
46+
deallocate()
47+
return θ
3648
end

src/glr/constructors.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ $SIGNATURES
6868
Objective function: ``|Xθ - y|₂²/2 + λ|θ|₂²/2 + γ|θ|₁``
6969
"""
7070
function ElasticNetRegression::Real=1.0, γ::Real=1.0; lambda::Real=λ, gamma::Real=γ,
71-
fit_intercept::Bool=true)
71+
fit_intercept::Bool=true)
7272
check_pos.((lambda, gamma))
7373
GLR(fit_intercept=fit_intercept, penalty=lambda*L2Penalty()+gamma*L1Penalty())
7474
end

src/glr/d_l2loss.jl

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,24 @@ function Hv!(glr::GLR{L2Loss,<:L2R}, X, y)
2424
a = 1:p
2525
Hvₐ = view(Hv, a)
2626
vₐ = view(v, a)
27-
Xt1 = vec(sum(X, dims=1))
27+
Xt1 = view(SCRATCH_P[], a)
28+
copyto!(Xt1, sum(X, dims=1)) # -- X'1
2829
vₑ = v[end]
29-
# update for the first p rows -- (X'X + λI)v[1:p] + (X'1)v[end]
30-
mul!(Hvₐ, X', X * vₐ)
30+
# update for the first p rows -- (X'X + λI)v[1:p] + (X'1)v[end]
31+
Xvₐ = SCRATCH_N[]
32+
mul!(Xvₐ, X, vₐ)
33+
mul!(Hvₐ, X', Xvₐ)
3134
Hvₐ .+= λ .* vₐ .+ Xt1 .* vₑ
32-
# update for the last row -- (X'1)'v + n v[end]
35+
# update for the last row -- (X'1)'v + n v[end]
3336
Hv[end] = dot(Xt1, vₐ) + (n+λ) * vₑ
3437
end
3538
else
36-
(Hv, v) -> (mul!(Hv, X', X * v); Hv .+= λ .* v)
39+
(Hv, v) -> begin
40+
Xv = SCRATCH_N[]
41+
mul!(Xv, X, v) # -- Xv
42+
mul!(Hv, X', Xv) # -- X'Xv
43+
Hv .+= λ .* v # -- X'Xv + λv
44+
end
3745
end
3846
end
3947

@@ -50,9 +58,12 @@ end
5058

5159
function smooth_fg!(glr::GLR{L2Loss,<:ENR}, X, y)
5260
λ = getscale_l2(glr.penalty)
53-
p = size(X, 2)
5461
(g, θ) -> begin
55-
r = apply_X(X, θ) .- y
62+
# cache contains the residuals (Xθ-y)
63+
= SCRATCH_N[]
64+
apply_X!(Xθ, X, θ)
65+
r = SCRATCH_N[]
66+
r .-= y # -- r = Xθ-y
5667
apply_Xt!(g, X, r)
5768
g .+= λ .* θ
5869
return glr.loss(r) + get_l2(glr.penalty)(θ)

src/glr/d_logistic.jl

Lines changed: 88 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -15,33 +15,51 @@ function fgh!(glr::GLR{LogisticLoss,<:L2R}, X, y)
1515
λ = getscale(glr.penalty)
1616
if glr.fit_intercept
1717
(f, g, H, θ) -> begin
18-
= apply_X(X, θ)
18+
= SCRATCH_N[]
19+
apply_X!(Xθ, X, θ) # -- Xθ = apply_X(X, θ)
1920
# precompute σ(yXθ) use -σ(-x) = (σ(x)-1)
20-
w = σ.(Xθ .* y)
21+
w = SCRATCH_N2[]
22+
w .= σ.(Xθ .* y) # -- w = σ.(Xθ .* y)
2123
g === nothing || begin
22-
tmp = y .* (w .- 1.0)
23-
apply_Xt!(g, X, tmp)
24+
t = SCRATCH_N3[]
25+
t .= y .* (w .- 1.0) # -- t = y .* (w .- 1.0)
26+
apply_Xt!(g, X, t) # -- g = X't
2427
g .+= λ .* θ
2528
end
2629
H === nothing || begin
27-
ΛX = w .* X
28-
mul!(view(H, 1:p, 1:p), X', ΛX)
29-
ΛXt1 = sum(ΛX, dims=1)
30+
# NOTE: we could try to be clever to reduce the allocations for
31+
# ΛX but computing the full hessian allocates a lot anyway so
32+
# probably not really worth it
33+
ΛX = w .* X # !! big allocs
34+
mul!(view(H, 1:p, 1:p), X', ΛX) # -- H[1:p,1:p] = X'ΛX
35+
ΛXt1 = view(SCRATCH_P[], 1:p)
36+
copyto!(ΛXt1, sum(ΛX, dims=1)) # -- (ΛX)'1
3037
@inbounds for i = 1:p
31-
H[i, end] = H[end, i] = ΛXt1[i]
38+
H[i, end] = H[end, i] = ΛXt1[i] # -- H[:,p+1] = H[p+1,:] = (ΛX)'1
3239
end
33-
H[end, end] = sum(w)
34-
add_λI!(H, λ)
40+
H[end, end] = sum(w) # -- 1'Λ1'
41+
add_λI!(H, λ) # -- H = X'ΛX + λI
3542
end
3643
f === nothing || return J(y, Xθ, θ)
3744
end
3845
else
46+
# see comments above, same computations just no additional things for
47+
# fit_intercept
3948
(f, g, H, θ) -> begin
40-
= apply_X(X, θ)
41-
# precompute σ(yXθ) use -σ(-x) = σ(x)(σ(x)-1)
42-
w = σ.(y .* Xθ)
43-
g === nothing || (mul!(g, X', y .* (w .- 1.0)); g .+= λ .* θ)
44-
H === nothing || (mul!(H, X', w .* X); add_λI!(H, λ))
49+
= SCRATCH_N[]
50+
apply_X!(Xθ, X, θ)
51+
w = SCRATCH_N2[]
52+
w .= σ.(y .* Xθ)
53+
g === nothing || begin
54+
t = SCRATCH_N3[]
55+
t .= y .* (w .- 1.0)
56+
apply_Xt!(g, X, t)
57+
g .+= λ .* θ
58+
end
59+
H === nothing || begin
60+
mul!(H, X', w .* X)
61+
add_λI!(H, λ)
62+
end
4563
f === nothing || return J(y, Xθ, θ)
4664
end
4765
end
@@ -55,24 +73,36 @@ function Hv!(glr::GLR{LogisticLoss,<:L2R}, X, y)
5573
# rows a 1:p = [X'ΛX + λI | X'Λ1]
5674
# row e end = [1'ΛX | sum(a)+λ]
5775
(Hv, θ, v) -> begin
58-
# precompute σ(yXθ) use -σ(-x) = (σ(x)-1)
59-
w = σ.(apply_X(X, θ) .* y)
76+
= SCRATCH_N[]
77+
apply_X!(Xθ, X, θ) # -- Xθ = apply_X(X, θ)
78+
w = SCRATCH_N2[]
79+
w .= σ.(Xθ .* y) # -- w = σ.(Xθ .* y)
6080
# view on the first p rows
6181
a = 1:p
6282
Hvₐ = view(Hv, a)
6383
vₐ = view(v, a)
64-
XtΛ1 = X' * w # X'Λ1; O(np)
84+
XtΛ1 = view(SCRATCH_P[], 1:p)
85+
mul!(XtΛ1, X', w) # -- X'Λ1; O(np)
6586
vₑ = v[end]
6687
# update for the first p rows -- (X'X + λI)v[1:p] + (X'1)v[end]
67-
mul!(Hvₐ, X', w .* (X * vₐ)) # (X'ΛX)vₐ
88+
Xvₐ = SCRATCH_N[]
89+
mul!(Xvₐ, X, vₐ)
90+
Xvₐ .*= w # -- ΛXvₐ
91+
mul!(Hvₐ, X', Xvₐ) # -- (X'ΛX)vₐ
6892
Hvₐ .+= λ .* vₐ .+ XtΛ1 .* vₑ
6993
# update for the last row -- (X'1)'v + n v[end]
7094
Hv[end] = dot(XtΛ1, vₐ) + (sum(w)+λ) * vₑ
7195
end
7296
else
7397
(Hv, θ, v) -> begin
74-
w = σ.(apply_X(X, θ) .* y)
75-
mul!(Hv, X', w .* (X * v))
98+
= SCRATCH_N[]
99+
apply_X!(Xθ, X, θ)
100+
w = SCRATCH_N2[]
101+
w .= σ.(Xθ .* y) # -- σ(yXθ)
102+
Xv = SCRATCH_N3[]
103+
mul!(Xv, X, v)
104+
Xv .*= SCRATCH_N2[] # -- ΛXv
105+
mul!(Hv, X', Xv) # -- X'ΛXv
76106
Hv .+= λ .* v
77107
end
78108
end
@@ -113,24 +143,50 @@ function fg!(glr::GLR{MultinomialLoss,<:L2R}, X, y)
113143
c = length(unique(y))
114144
λ = getscale(glr.penalty)
115145
(f, g, θ) -> begin
116-
P = apply_X(X, θ, c) # O(npc) store n * c
117-
M = exp.(P) # O(npc) store n * c
146+
P = SCRATCH_NC[]
147+
apply_X!(P, X, θ, c) # O(npc) store n * c
148+
M = SCRATCH_NC2[]
149+
M .= exp.(P) # O(npc) store n * c
118150
g === nothing || begin
119-
ΛM = M ./ sum(M, dims=2) # O(nc) store n * c
120-
Q = BitArray(y[i] == j for i = 1:n, j=1:c)
121-
G = X'ΛM .- X'Q # O(npc) store n * c
151+
ΛM = SCRATCH_NC3[]
152+
ΛM .= M ./ sum(M, dims=2) # O(nc) store n * c
153+
Q = SCRATCH_NC4[]
154+
@inbounds for i = 1:n, j=1:c
155+
Q[i, j] = ifelse(y[i] == j, 1.0, 0.0)
156+
end
157+
∑ΛM = sum(ΛM, dims=1)
158+
∑Q = sum(Q, dims=1)
159+
R = ΛM
160+
R .-= Q
161+
G = SCRATCH_PC[]
122162
if glr.fit_intercept
123-
G = vcat(G, sum(ΛM, dims=1) .- sum(Q, dims=1))
163+
mul!(view(G, 1:p, :), X', R)
164+
@inbounds for k in 1:c
165+
G[end, k] = ∑ΛM[k] - ∑Q[k]
166+
end
167+
else
168+
mul!(G, X', R)
124169
end
125-
g .= reshape(G, (p + Int(glr.fit_intercept)) * c)
170+
g .= reshape(G, (p+Int(glr.fit_intercept))*c)
126171
g .+= λ .* θ
127172
end
128173
f === nothing || begin
129174
# we re-use pre-computations here, see also MultinomialLoss
130-
ms = maximum(P, dims=2)
131-
ss = sum(M ./ exp.(ms), dims=2)
132-
@inbounds ps = [P[i, y[i]] for i in eachindex(y)]
133-
return sum(log.(ss) .+ ms .- ps) + glr.penalty(θ)
175+
# ms = maximum(P, dims=2)
176+
# ss = sum(M ./ exp.(ms), dims=2)
177+
ms = maximum(P, dims=2)
178+
ems = SCRATCH_N[]
179+
@inbounds for i in 1:n
180+
ems[i] = exp(ms[i])
181+
end
182+
ΛM = SCRATCH_NC2[] # note that _NC is already linked to P
183+
ΛM .= M ./ ems
184+
ss = sum(ΛM, dims=2)
185+
t = 0.0
186+
@inbounds for i in eachindex(y)
187+
t += log(ss[i]) + ms[i] - P[i, y[i]]
188+
end
189+
return sum(t) + glr.penalty(θ)
134190
end
135191
end
136192
end

0 commit comments

Comments
 (0)