Skip to content

Commit 6316160

Browse files
authored
Benchmarking etc (#29)
* decreasing allocs via temp * using morel legible refs * reviews 1 * adding tests for scratchspace * add scratch for robust * tests ok * adding a bunch of tests and completing the PR * additional fixes * update bench * fixing all conflicts...
1 parent 3cc208b commit 6316160

File tree

7 files changed

+238
-62
lines changed

7 files changed

+238
-62
lines changed

src/glr/d_l2loss.jl

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ function Hv!(glr::GLR{L2Loss,<:L2R}, X, y)
2525
Hvₐ = view(Hv, a)
2626
vₐ = view(v, a)
2727
Xt1 = view(SCRATCH_P[], a)
28-
copyto!(Xt1, sum(X, dims=1)) # -- X'1
28+
copyto!(Xt1, sum(X, dims=1)) # -- X'1 (note: sum will allocate)
2929
vₑ = v[end]
3030
# update for the first p rows -- (X'X + λI)v[1:p] + (X'1)v[end]
3131
Xvₐ = SCRATCH_N[]
@@ -60,10 +60,8 @@ function smooth_fg!(glr::GLR{L2Loss,<:ENR}, X, y)
6060
λ = getscale_l2(glr.penalty)
6161
(g, θ) -> begin
6262
# cache contains the residuals (Xθ-y)
63-
= SCRATCH_N[]
64-
apply_X!(Xθ, X, θ)
65-
r = SCRATCH_N[]
66-
r .-= y # -- r = Xθ-y
63+
r = SCRATCH_N[]
64+
get_residuals!(r, X, θ, y) # -- r = Xθ-y
6765
apply_Xt!(g, X, r)
6866
g .+= λ .* θ
6967
return glr.loss(r) + get_l2(glr.penalty)(θ)

src/glr/d_robust.jl

Lines changed: 51 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,20 @@ function fgh!(glr::GLR{RobustLoss{ρ},<:L2R}, X, y) where ρ <: RobustRho1P{δ}
3838
ϕ_ = ϕ(ρ)
3939
if glr.fit_intercept
4040
(f, g, H, θ) -> begin
41-
r = _get_residuals(X, θ, y)
42-
w = _get_w(r, δ)
41+
r = SCRATCH_N[]
42+
get_residuals!(r, X, θ, y)
43+
w = SCRATCH_N2[]
44+
w .= convert.(Float64, abs.(r) .<= δ)
4345
# gradient via ψ function
4446
g === nothing || begin
45-
ψr = _get_ψr(r, w, ψ_)
47+
ψr = SCRATCH_N3[]
48+
ψr .= ψ_.(r, w)
4649
apply_Xt!(g, X, ψr)
4750
g .+= λ .* θ
4851
end
4952
# Hessian via ϕ functiono
5053
H === nothing || begin
51-
# Hessian allocates a ton anyway so use of scratch is a bit pointless
54+
# NOTE: Hessian allocates a ton anyway so use of scratch is a bit pointless
5255
ϕr = ϕ_.(r, w)
5356
ΛX = ϕr .* X
5457
mul!(view(H, 1:p, 1:p), X', ΛX)
@@ -64,11 +67,14 @@ function fgh!(glr::GLR{RobustLoss{ρ},<:L2R}, X, y) where ρ <: RobustRho1P{δ}
6467
end
6568
else
6669
(f, g, H, θ) -> begin
67-
r = _get_residuals(X, θ, y)
68-
w = _get_w(r, δ)
70+
r = SCRATCH_N[]
71+
get_residuals!(r, X, θ, y)
72+
w = SCRATCH_N2[]
73+
w .= convert.(Float64, abs.(r) .<= δ)
6974
# gradient via ψ function
7075
g === nothing || begin
71-
ψr = _get_ψr(r, w, ψ_)
76+
ψr = SCRATCH_N3[]
77+
ψr .= ψ_.(r, w)
7278
apply_Xt!(g, X, ψr)
7379
g .+= λ .* θ
7480
end
@@ -87,8 +93,10 @@ function Hv!(glr::GLR{RobustLoss{ρ},<:L2R}, X, y) where ρ <: RobustRho1P{δ} w
8793
# see d_logistic.jl for more comments on this (similar procedure)
8894
if glr.fit_intercept
8995
(Hv, θ, v) -> begin
90-
r = _get_residuals(X, θ, y)
91-
w = _get_w(r, δ)
96+
r = SCRATCH_N[]
97+
get_residuals!(r, X, θ, y)
98+
w = SCRATCH_N2[]
99+
w .= convert.(Float64, abs.(r) .<= δ)
92100
w .= ϕ_.(r, w)
93101
# views on first p rows (intercept row treated after)
94102
a = 1:p
@@ -108,8 +116,10 @@ function Hv!(glr::GLR{RobustLoss{ρ},<:L2R}, X, y) where ρ <: RobustRho1P{δ} w
108116
end
109117
else
110118
(Hv, θ, v) -> begin
111-
r = _get_residuals(X, θ, y)
112-
w = _get_w(r, δ)
119+
r = SCRATCH_N[]
120+
get_residuals!(r, X, θ, y)
121+
w = SCRATCH_N2[]
122+
w .= convert.(Float64, abs.(r) .<= δ)
113123
w .= ϕ_.(r, w)
114124
t = SCRATCH_N3[]
115125
apply_X!(t, X, v)
@@ -120,6 +130,7 @@ function Hv!(glr::GLR{RobustLoss{ρ},<:L2R}, X, y) where ρ <: RobustRho1P{δ} w
120130
end
121131
end
122132

133+
123134
# For IWLS
124135
function Mv!(glr::GLR{RobustLoss{ρ},<:L2R}, X, y;
125136
threshold=1e-6) where ρ <: RobustRho1P{δ} where δ
@@ -130,24 +141,38 @@ function Mv!(glr::GLR{RobustLoss{ρ},<:L2R}, X, y;
130141
# which we solve via an iterative method so, one θ
131142
# gives one way of applying the relevant matrix (X'ΛX+λI)
132143
(ωr, θ) -> begin
133-
r = _get_residuals(X, θ, y)
134-
w = _get_w(r, δ)
144+
r = SCRATCH_N[]
145+
get_residuals!(r, X, θ, y)
146+
w = SCRATCH_N2[]
147+
w .= convert.(Float64, abs.(r) .<= δ)
135148
# ω = ψ(r)/r ; weighing factor for IWLS
136149
ωr .= ω_.(r, w)
137150
# function defining the application of (X'ΛX + λI)
138151
if glr.fit_intercept
139152
(Mv, v) -> begin
140-
a = 1:p
141-
vₐ = view(v, a)
142-
Mvₐ = view(Mv, a)
143-
XtW1 = vec(sum(ωr .* X, dims=1))
144-
vₑ = v[end]
145-
mul!(Mvₐ, X', ωr .* (X * vₐ))
153+
a = 1:p
154+
vₐ = view(v, a)
155+
Mvₐ = view(Mv, a)
156+
XtW1 = view(SCRATCH_P[], a)
157+
@inbounds for j in a
158+
XtW1[j] = dot(ωr, view(X, :, j))
159+
end
160+
vₑ = v[end]
161+
t = SCRATCH_N[]
162+
apply_X!(t, X, vₐ)
163+
t .*= ωr
164+
mul!(Mvₐ, X', t)
146165
Mvₐ .+= λ .* vₐ .+ XtW1 .* vₑ
147166
Mv[end] = dot(XtW1, vₐ) + (sum(ωr)+λ) * vₑ
148167
end
149168
else
150-
(Mv, v) -> (mul!(Mv, X', ωr .* (X * v)); Mv .+= λ .* v)
169+
(Mv, v) -> begin
170+
t = SCRATCH_N[]
171+
apply_X!(t, X, v)
172+
t .*= ωr
173+
mul!(Mv, X', t)
174+
Mv .+= λ .* v
175+
end
151176
end
152177
end
153178
end
@@ -159,9 +184,12 @@ function smooth_fg!(glr::GLR{RobustLoss{ρ},<:ENR}, X, y) where ρ <: RobustRho1
159184
p = size(X, 2)
160185
ψ_ = ψ(ρ)
161186
(g, θ) -> begin
162-
r = _get_residuals(X, θ, y)
163-
w = _get_w(r, δ)
164-
ψr = _get_ψr(r, w, ψ_)
187+
r = SCRATCH_N[]
188+
get_residuals!(r, X, θ, y)
189+
w = SCRATCH_N2[]
190+
w .= convert.(Float64, abs.(r) .<= δ)
191+
ψr = SCRATCH_N3[]
192+
ψr .= ψ_.(r, w)
165193
apply_Xt!(g, X, ψr)
166194
g .+= λ .* θ
167195
return glr.loss(r) + get_l2(glr.penalty)(θ)

src/glr/utils.jl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,14 @@ $SIGNATURES
4040
Return a model corresponding to the smooth part of the objective.
4141
"""
4242
get_smooth(glr::GLR) = (o = smooth_objective(glr); GLR(o.loss, o.penalty, glr.fit_intercept))
43+
44+
45+
"""
46+
$SIGNATURES
47+
48+
Helper function to compute the residuals.
49+
"""
50+
function get_residuals!(r, X, θ, y)
51+
apply_X!(r, X, θ)
52+
r .-= y
53+
end

src/utils.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ function apply_X!(Xθ, X, θ, c=1)
5757
if length(θ) == p
5858
mul!(Xθ, X, θ)
5959
else
60-
mul!(Xθ, X, θ[1:p])
60+
mul!(Xθ, X, view(θ, 1:p))
6161
.+= θ[end]
6262
end
6363
else
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
using MLJLinearModels
2+
using BenchmarkTools, Random, LinearAlgebra
3+
DO_COMPARISONS = false; include("../testutils.jl")
4+
5+
n, p = 50_000, 500
6+
((X, y, θ), (X_, y1, θ1)) = generate_continuous(n, p; seed=512, sparse=0.5)
7+
8+
# ======================== #
9+
# ELEMENTARIES
10+
#
11+
# > apply_X! ✅ Sept 13, 2019
12+
# > apply_Xt! ✅ Sept 13, 2019
13+
# > get_residuals! ✅ Sept 13, 2019
14+
# ======================== #
15+
16+
# ------------
17+
# No intercept
18+
19+
R.allocate(n, p)
20+
21+
= similar(y)
22+
Xtθ = similar(θ)
23+
r = R.SCRATCH_N[]
24+
25+
# Sept 13, 2019 :: 7.83 ms (0 allocations: 0 bytes)
26+
@btime R.apply_X!($Xθ, $X, $θ);
27+
28+
# Sept 13, 2019 :: 7.94 ms (0 allocations: 0 bytes) [pretty much only apply_X!]
29+
@btime R.get_residuals!($r, $X, $θ, $y);
30+
31+
# Sept 13, 2019 :: 7.9 ms (0 allocations: 0 bytes)
32+
@btime R.apply_Xt!($Xtθ, $X, $y);
33+
34+
# --------------
35+
# With intercept
36+
37+
R.allocate(n, p+1)
38+
39+
= similar(y)
40+
Xtθ = similar(θ1)
41+
r = R.SCRATCH_N[]
42+
43+
# Sept 13, 2019 :: 7.949 ms (1 allocation: 48 bytes) -- alloc for the view
44+
@btime R.apply_X!($Xθ, $X, $θ1);
45+
46+
# Sept 13, 2019 :: 7.955 ms (1 allocation: 48 bytes) -- mostly apply_X!
47+
@btime R.get_residuals!($r, $X, $θ1, $y);
48+
49+
# Sept 13, 2019 :: 7.883 ms (1 allocation: 48 bytes)
50+
@btime R.apply_Xt!($Xtθ, $X, $y);

test/benchmarks/ridge-lasso.jl

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,74 @@
1-
# WIP WIP !
2-
31
using MLJLinearModels
42
using BenchmarkTools, Random, LinearAlgebra
53
DO_COMPARISONS = false; include("../testutils.jl")
64

75
n, p = 50_000, 500
6+
((X, y, θ), (X_, y1, θ1)) = generate_continuous(n, p; seed=512, sparse=0.5)
7+
8+
# =============== #
9+
# RIDGE FUNCTIONS #
10+
#
11+
# Hv! ✅
12+
# =============== #
13+
14+
# No fit_intercept
15+
R.allocate(n, p)
16+
ridge = RidgeRegression(0.5; fit_intercept=false)
17+
Hv! = R.Hv!(ridge, X, y)
18+
v = randn(p)
19+
Hv = similar(v)
20+
21+
# Sept 13, 2019 :: 15.300 ms (0 allocations: 0 bytes)
22+
@btime Hv!($Hv, $v)
23+
24+
# With fit_intercept
25+
R.allocate(n, p+1)
26+
ridge = RidgeRegression(λ)
27+
Hv! = R.Hv!(ridge, X, y)
28+
v = randn(p+1)
29+
Hv = similar(v)
30+
31+
# Sept 13, 2019 :: 26.246 ms (5 allocations: 4.22 KiB)
32+
# (decrease in perf due to views, and need a sum on an array)
33+
@btime Hv!($Hv, $v)
34+
35+
# =============== #
36+
# LASSO FUNCTIONS #
37+
#
38+
# smooth_fg!
39+
# =============== #
40+
41+
# No fit_intercept
42+
R.allocate(n, p)
43+
44+
lasso = LassoRegression(0.5, fit_intercept=false)
45+
smooth_fg! = R.smooth_fg!(lasso, X, y)
46+
v = randn(p)
47+
g = similar(v)
48+
49+
# Sept 13, 2019 :: 15.508 ms (3 allocations: 390.72 KiB)
50+
@btime smooth_fg!($g, $v);
51+
52+
@btime R.get_residuals!($X, $θ, $y)
53+
54+
r = R.get_residuals!(X, θ, y)
55+
56+
@btime R.apply_Xt!(g, X, r)
57+
58+
@btime (g .+= 0.5 .* θ)
59+
60+
61+
62+
63+
64+
# =======
65+
=======
866
((X, _, _), (X_, y1, θ1)) = generate_continuous(n, p; seed=512, sparse=0.1)
967

1068
λ = 50
1169
lr = LassoRegression(λ)
1270
J = objective(lr, X, y1)
71+
>>>>>>> master
1372

1473
fista = FISTA()
1574
ista = ISTA()

0 commit comments

Comments
 (0)