Skip to content

Commit 7a76bf1

Browse files
committed
Add benchmark workaround for BenchmarkTools memory leak.
1 parent e2da0d5 commit 7a76bf1

File tree

2 files changed

+142
-139
lines changed

2 files changed

+142
-139
lines changed

benchmark/benchmarkflops.jl

Lines changed: 0 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -103,39 +103,6 @@ function At_mul_Bt_bench!(br, s, i)
103103
matmul_bench!(br, C, A, B, i)
104104
end
105105

106-
blastests() = [
107-
"LoopVectorization",
108-
"Julia", "Clang",
109-
"GFortran", "icc", "ifort",
110-
"g++ & Eigen-3", "clang++ & Eigen-3",
111-
"GFortran-builtin", "ifort-builtin",
112-
"OpenBLAS", "MKL"
113-
]
114-
115-
function benchmark_AmulB(sizes)
116-
br = BenchmarkResult(blastests(), sizes)
117-
sm = br.sizedresults.results
118-
pmap(is -> A_mul_B_bench!(sm, is[2], is[1]), enumerate(sizes))
119-
br
120-
end
121-
function benchmark_AmulBt(sizes)
122-
br = BenchmarkResult(blastests(), sizes)
123-
sm = br.sizedresults.results
124-
pmap(is -> A_mul_Bt_bench!(sm, is[2], is[1]), enumerate(sizes))
125-
br
126-
end
127-
function benchmark_AtmulB(sizes)
128-
br = BenchmarkResult(blastests(), sizes)
129-
sm = br.sizedresults.results
130-
pmap(is -> At_mul_B_bench!(sm, is[2], is[1]), enumerate(sizes))
131-
br
132-
end
133-
function benchmark_AtmulBt(sizes)
134-
br = BenchmarkResult(blastests(), sizes)
135-
sm = br.sizedresults.results
136-
pmap(is -> At_mul_Bt_bench!(sm, is[2], is[1]), enumerate(sizes))
137-
br
138-
end
139106

140107
function dot_bench!(br, s, i)
141108
a = rand(s); b = rand(s);
@@ -159,13 +126,6 @@ function dot_bench!(br, s, i)
159126
@assert iedot(a,b) dotblas "i-eigen dot wrong?"
160127
# br[9,i] = n_gflop / @belapsed dot($a, $b)
161128
end
162-
function benchmark_dot(sizes)
163-
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3"]#, "OpenBLAS"]
164-
br = BenchmarkResult(tests, sizes)
165-
sm = br.sizedresults.results
166-
pmap(is -> dot_bench!(sm, is[2], is[1]), enumerate(sizes))
167-
br
168-
end
169129
function selfdot_bench!(br, s, i)
170130
a = rand(s); b = rand(s);
171131
dotblas = dot(a, a)
@@ -188,13 +148,6 @@ function selfdot_bench!(br, s, i)
188148
@assert ieselfdot(a) dotblas "i-eigen dot wrong?"
189149
# br[9,i] = n_gflop / @belapsed dot($a, $a)
190150
end
191-
function benchmark_selfdot(sizes)
192-
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3"]#, "OpenBLAS"]
193-
br = BenchmarkResult(tests, sizes)
194-
sm = br.sizedresults.results
195-
pmap(is -> selfdot_bench!(sm, is[2], is[1]), enumerate(sizes))
196-
br
197-
end
198151

199152
totwotuple(i::Int) = (i,i)
200153
totwotuple(i::Tuple{Int,Int}) = i
@@ -241,18 +194,6 @@ function At_mul_vb_bench!(br, s, i)
241194
y = rand(N);
242195
gemv_bench!(br, x, A, y, i)
243196
end
244-
function benchmark_Amulvb(sizes)
245-
br = BenchmarkResult(blastests(), sizes)
246-
sm = br.sizedresults.results
247-
pmap(is -> A_mul_vb_bench!(sm, is[2], is[1]), enumerate(sizes))
248-
br
249-
end
250-
function benchmark_Atmulvb(sizes)
251-
br = BenchmarkResult(blastests(), sizes)
252-
sm = br.sizedresults.results
253-
pmap(is -> At_mul_vb_bench!(sm, is[2], is[1]), enumerate(sizes))
254-
br
255-
end
256197

257198
function dot3_bench!(br, s, i)
258199
M, N = totwotuple(s)
@@ -277,13 +218,6 @@ function dot3_bench!(br, s, i)
277218
@assert iedot3(x, A, y) dotblas "c-eigen dot wrong?"
278219
br[9,i] = n_gflop / @belapsed dot($x, $A, $y)
279220
end
280-
function benchmark_dot3(sizes)
281-
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3", "LinearAlgebra" ]
282-
br = BenchmarkResult(tests, sizes)
283-
sm = br.sizedresults.results
284-
pmap(is -> dot3_bench!(sm, is[2], is[1]), enumerate(sizes))
285-
br
286-
end
287221
# BLAS.set_num_threads(1)
288222
function sse!(Xβ, y, X, β)
289223
dgemvmkl!(copyto!(Xβ, y), X, β, 1.0, -1.0)
@@ -317,13 +251,6 @@ function sse_bench!(br, s, i)
317251
@assert ieOLSlp(y, X, β) lpblas "i-eigen wrong?"
318252
br[9,i] = n_gflop / @belapsed sse!($Xβ, $y, $X, $β)
319253
end
320-
function benchmark_sse(sizes)
321-
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3", "MKL"]
322-
br = BenchmarkResult(tests, sizes)
323-
sm = br.sizedresults.results
324-
pmap(is -> sse_bench!(sm, is[2], is[1]), enumerate(sizes))
325-
br
326-
end
327254

328255
function exp_bench!(br, s, i)
329256
a = rand(s); b = similar(a)
@@ -341,13 +268,6 @@ function exp_bench!(br, s, i)
341268
br[6,i] = n_gflop / @belapsed ifvexp!($b, $a)
342269
@assert b baseb "ifort wrong?"
343270
end
344-
function benchmark_exp(sizes)
345-
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort"]
346-
br = BenchmarkResult(tests, sizes)
347-
sm = br.sizedresults.results
348-
pmap(is -> exp_bench!(sm, is[2], is[1]), enumerate(sizes))
349-
br
350-
end
351271

352272
function aplusBc_bench!(br, s, i)
353273
M, N = totwotuple(s)
@@ -371,13 +291,6 @@ function aplusBc_bench!(br, s, i)
371291
br[8,i] = n_gflop / @belapsed ieaplusBc!($D, $a, $B, $c)
372292
@assert D Dcopy "i-eigen wrong?"; fill!(D, NaN);
373293
end
374-
function benchmark_aplusBc(sizes)
375-
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3"]
376-
br = BenchmarkResult(tests, sizes)
377-
sm = br.sizedresults.results
378-
pmap(is -> aplusBc_bench!(sm, is[2], is[1]), enumerate(sizes))
379-
br
380-
end
381294

382295
function AplusAt_bench!(br, s, i)
383296
A = rand(s,s); B = similar(A)
@@ -403,13 +316,6 @@ function AplusAt_bench!(br, s, i)
403316
br[10,i] = n_gflop / @belapsed ifAplusAt_builtin!($B, $A)
404317
@assert B baseB "ifort-builtin wrong?"; fill!(B, NaN);
405318
end
406-
function benchmark_AplusAt(sizes)
407-
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3", "GFortran-builtin", "ifort-builtin"]
408-
br = BenchmarkResult(tests, sizes)
409-
sm = br.sizedresults.results
410-
pmap(is -> AplusAt_bench!(sm, is[2], is[1]), enumerate(sizes))
411-
br
412-
end
413319

414320
function randomaccess_bench!(br, s, i)
415321
A, C = totwotuple(s)
@@ -430,13 +336,6 @@ function randomaccess_bench!(br, s, i)
430336
br[6,i] = n_gflop / @belapsed ifrandomaccess($P, $basis, $coefs)
431337
@assert p ifrandomaccess(P, basis, coefs) "ifort wrong?"
432338
end
433-
function benchmark_random_access(sizes)
434-
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort"]
435-
br = BenchmarkResult(tests, sizes)
436-
sm = br.sizedresults.results
437-
pmap(is -> randomaccess_bench!(sm, is[2], is[1]), enumerate(sizes))
438-
br
439-
end
440339

441340
function logdettriangle_bench!(br, s, i)
442341
S = randn(s, 2s)
@@ -461,14 +360,6 @@ function logdettriangle_bench!(br, s, i)
461360
# @assert ld ≈ ielogdettriangle(U) "i-eigen wrong?"; fill!(B, NaN);
462361
br[7,i] = n_gflop / @belapsed logdet($U)
463362
end
464-
function benchmark_logdettriangle(sizes)
465-
# tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3", "LinearAlgebra"]
466-
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "LinearAlgebra"]
467-
br = BenchmarkResult(tests, sizes)
468-
sm = br.sizedresults.results
469-
pmap(is -> logdettriangle_bench!(sm, is[2], is[1]), enumerate(sizes))
470-
br
471-
end
472363

473364

474365
function filter2d_bench_run!(br, s, i, K)
@@ -489,22 +380,7 @@ function filter2d_bench_run!(br, s, i, K)
489380
br[6,i] = n_gflop / @belapsed iffilter2d!($B, $A, $K)
490381
@assert B Bcopy "ifort wrong?"
491382
end
492-
function benchmark_filter2d(sizes, K)
493-
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort"]
494-
br = BenchmarkResult(tests, sizes)
495-
sm = br.sizedresults.results
496-
pmap(is -> filter2d_bench_run!(sm, is[2], is[1], K), enumerate(sizes))
497-
br
498-
end
499383

500-
function benchmark_filter2ddynamic(sizes)
501-
K = OffsetArray(rand(Float64, 3, 3), -1:1, -1:1)
502-
benchmark_filter2d(sizes, K)
503-
end
504-
function benchmark_filter2d3x3(sizes)
505-
K = SizedOffsetMatrix{Float64,-1,1,-1,1}(rand(3,3))
506-
benchmark_filter2d(sizes, K)
507-
end
508384

509385
function filter2dunrolled_bench_run!(br, s, i, K)
510386
A = rand(s + 2, s + 2)
@@ -524,11 +400,3 @@ function filter2dunrolled_bench_run!(br, s, i, K)
524400
br[6,i] = n_gflop / @belapsed iffilter2dunrolled!($B, $A, $K)
525401
@assert B Bcopy "ifort wrong?"
526402
end
527-
function benchmark_filter2dunrolled(sizes)
528-
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort"]
529-
br = BenchmarkResult(tests, sizes)
530-
sm = br.sizedresults.results
531-
K = SizedOffsetMatrix{Float64,-1,1,-1,1}(rand(3,3))
532-
pmap(is -> filter2dunrolled_bench_run!(sm, is[2], is[1], K), enumerate(sizes))
533-
br
534-
end

benchmark/driver.jl

Lines changed: 142 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,149 @@ include(joinpath(LOOPVECBENCHDIR, "benchmarkflops.jl"))
99
include(joinpath(LOOPVECBENCHDIR, "plotbenchmarks.jl"))
1010

1111

12-
addprocs((Sys.CPU_THREADS >> 1)-1); nworkers()
13-
14-
@everywhere begin
15-
using LoopVectorization
16-
const LOOPVECBENCHDIR = joinpath(pkgdir(LoopVectorization), "benchmark")
17-
include(joinpath(LOOPVECBENCHDIR, "benchmarkflops.jl"))
18-
# BenchmarkTools.DEFAULT_PARAMETERS.seconds = 1
12+
nprocs_to_add() = (Sys.CPU_THREADS >> 1) - 1
13+
start_worker(wid) = remotecall(include, wid, joinpath(LOOPVECBENCHDIR, "setup_worker.jl"))
14+
function start_workers()
15+
addprocs(nprocs_to_add())
16+
foreach(wait, map(start_worker, workers()))
17+
end
18+
stop_workers() = rmprocs(workers())
19+
addprocs(); nworkers()
20+
21+
pmap_startstop(f, s) = (start_workers(); r = pmap(f, s); stop_workers(); r)
22+
23+
blastests() = [
24+
"LoopVectorization",
25+
"Julia", "Clang",
26+
"GFortran", "icc", "ifort",
27+
"g++ & Eigen-3", "clang++ & Eigen-3",
28+
"GFortran-builtin", "ifort-builtin",
29+
"OpenBLAS", "MKL"
30+
]
31+
function benchmark_AmulB(sizes)
32+
br = BenchmarkResult(blastests(), sizes)
33+
sm = br.sizedresults.results
34+
pmap_startstop(is -> A_mul_B_bench!(sm, is[2], is[1]), enumerate(sizes))
35+
br
36+
end
37+
function benchmark_AmulBt(sizes)
38+
br = BenchmarkResult(blastests(), sizes)
39+
sm = br.sizedresults.results
40+
pmap_startstop(is -> A_mul_Bt_bench!(sm, is[2], is[1]), enumerate(sizes))
41+
br
42+
end
43+
function benchmark_AtmulB(sizes)
44+
br = BenchmarkResult(blastests(), sizes)
45+
sm = br.sizedresults.results
46+
pmap_startstop(is -> At_mul_B_bench!(sm, is[2], is[1]), enumerate(sizes))
47+
br
48+
end
49+
function benchmark_AtmulBt(sizes)
50+
br = BenchmarkResult(blastests(), sizes)
51+
sm = br.sizedresults.results
52+
pmap_startstop(is -> At_mul_Bt_bench!(sm, is[2], is[1]), enumerate(sizes))
53+
br
54+
end
55+
function benchmark_dot(sizes)
56+
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3"]#, "OpenBLAS"]
57+
br = BenchmarkResult(tests, sizes)
58+
sm = br.sizedresults.results
59+
pmap_startstop(is -> dot_bench!(sm, is[2], is[1]), enumerate(sizes))
60+
br
61+
end
62+
function benchmark_selfdot(sizes)
63+
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3"]#, "OpenBLAS"]
64+
br = BenchmarkResult(tests, sizes)
65+
sm = br.sizedresults.results
66+
pmap_startstop(is -> selfdot_bench!(sm, is[2], is[1]), enumerate(sizes))
67+
br
68+
end
69+
function benchmark_Amulvb(sizes)
70+
br = BenchmarkResult(blastests(), sizes)
71+
sm = br.sizedresults.results
72+
pmap_startstop(is -> A_mul_vb_bench!(sm, is[2], is[1]), enumerate(sizes))
73+
br
74+
end
75+
function benchmark_Atmulvb(sizes)
76+
br = BenchmarkResult(blastests(), sizes)
77+
sm = br.sizedresults.results
78+
pmap_startstop(is -> At_mul_vb_bench!(sm, is[2], is[1]), enumerate(sizes))
79+
br
80+
end
81+
function benchmark_dot3(sizes)
82+
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3", "LinearAlgebra" ]
83+
br = BenchmarkResult(tests, sizes)
84+
sm = br.sizedresults.results
85+
pmap_startstop(is -> dot3_bench!(sm, is[2], is[1]), enumerate(sizes))
86+
br
87+
end
88+
function benchmark_sse(sizes)
89+
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3", "MKL"]
90+
br = BenchmarkResult(tests, sizes)
91+
sm = br.sizedresults.results
92+
pmap_startstop(is -> sse_bench!(sm, is[2], is[1]), enumerate(sizes))
93+
br
1994
end
95+
function benchmark_exp(sizes)
96+
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort"]
97+
br = BenchmarkResult(tests, sizes)
98+
sm = br.sizedresults.results
99+
pmap_startstop(is -> exp_bench!(sm, is[2], is[1]), enumerate(sizes))
100+
br
101+
end
102+
function benchmark_aplusBc(sizes)
103+
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3"]
104+
br = BenchmarkResult(tests, sizes)
105+
sm = br.sizedresults.results
106+
pmap_startstop(is -> aplusBc_bench!(sm, is[2], is[1]), enumerate(sizes))
107+
br
108+
end
109+
function benchmark_AplusAt(sizes)
110+
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3", "GFortran-builtin", "ifort-builtin"]
111+
br = BenchmarkResult(tests, sizes)
112+
sm = br.sizedresults.results
113+
pmap_startstop(is -> AplusAt_bench!(sm, is[2], is[1]), enumerate(sizes))
114+
br
115+
end
116+
function benchmark_random_access(sizes)
117+
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort"]
118+
br = BenchmarkResult(tests, sizes)
119+
sm = br.sizedresults.results
120+
pmap_startstop(is -> randomaccess_bench!(sm, is[2], is[1]), enumerate(sizes))
121+
br
122+
end
123+
function benchmark_logdettriangle(sizes)
124+
# tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3", "LinearAlgebra"]
125+
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "LinearAlgebra"]
126+
br = BenchmarkResult(tests, sizes)
127+
sm = br.sizedresults.results
128+
pmap_startstop(is -> logdettriangle_bench!(sm, is[2], is[1]), enumerate(sizes))
129+
br
130+
end
131+
function benchmark_filter2d(sizes, K)
132+
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort"]
133+
br = BenchmarkResult(tests, sizes)
134+
sm = br.sizedresults.results
135+
pmap_startstop(is -> filter2d_bench_run!(sm, is[2], is[1], K), enumerate(sizes))
136+
br
137+
end
138+
function benchmark_filter2ddynamic(sizes)
139+
K = OffsetArray(rand(Float64, 3, 3), -1:1, -1:1)
140+
benchmark_filter2d(sizes, K)
141+
end
142+
function benchmark_filter2d3x3(sizes)
143+
K = SizedOffsetMatrix{Float64,-1,1,-1,1}(rand(3,3))
144+
benchmark_filter2d(sizes, K)
145+
end
146+
function benchmark_filter2dunrolled(sizes)
147+
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort"]
148+
br = BenchmarkResult(tests, sizes)
149+
sm = br.sizedresults.results
150+
K = SizedOffsetMatrix{Float64,-1,1,-1,1}(rand(3,3))
151+
pmap_startstop(is -> filter2dunrolled_bench_run!(sm, is[2], is[1], K), enumerate(sizes))
152+
br
153+
end
154+
20155

21156

22157
# sizes = 23:23

0 commit comments

Comments
 (0)