Skip to content

Commit 5025e85

Browse files
committed
Test workaround for BenchmarkTools memory leak, delete src/zerl.jl.
1 parent 1dafd32 commit 5025e85

22 files changed

+95
-110
lines changed

benchmark/benchmarkflops.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ include(joinpath(LOOPVECBENCHDIR, "loadsharedlibs.jl"))
33

44
using BenchmarkTools, SharedArrays
55
struct SizedResults{V <: AbstractVector} <: AbstractMatrix{String}
6-
results::SharedMatrix{Float64}
6+
results::Matrix{Float64}
77
sizes::V
88
end
99
function Base.size(sr::SizedResults)
@@ -14,11 +14,11 @@ struct BenchmarkResult{V}
1414
tests::Vector{String}
1515
sizedresults::SizedResults{V}
1616
end
17-
function BenchmarkResult(tests, sizes)
17+
function BenchmarkResult(results, tests, sizes)
1818
ntests = length(tests); nsizes = length(sizes)
1919
BenchmarkResult(
2020
append!(["Size"], tests),
21-
SizedResults(SharedMatrix{Float64}(ntests, nsizes), sizes)
21+
SizedResults(results, sizes)
2222
)
2323
end
2424
function Base.getindex(br::SizedResults, row, col)
@@ -29,7 +29,7 @@ function Base.vcat(br1::BenchmarkResult, br2::BenchmarkResult)
2929
BenchmarkResult(
3030
br1.tests,
3131
SizedResults(
32-
SharedMatrix(hcat(br1.sizedresults.results, br2.sizedresults.results)),
32+
hcat(br1.sizedresults.results, br2.sizedresults.results),
3333
vcat(br1.sizedresults.sizes, br2.sizedresults.sizes)
3434
)
3535
)

benchmark/driver.jl

Lines changed: 91 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,6 @@ function start_workers()
1616
foreach(wait, map(start_worker, workers()))
1717
end
1818
stop_workers() = rmprocs(workers())
19-
addprocs(); nworkers()
20-
21-
pmap_startstop(f, s) = (start_workers(); r = pmap(f, s); stop_workers(); r)
2219

2320
blastests() = [
2421
"LoopVectorization",
@@ -29,110 +26,142 @@ blastests() = [
2926
"OpenBLAS", "MKL"
3027
]
3128
function benchmark_AmulB(sizes)
32-
br = BenchmarkResult(blastests(), sizes)
33-
sm = br.sizedresults.results
34-
pmap_startstop(is -> A_mul_B_bench!(sm, is[2], is[1]), enumerate(sizes))
29+
start_workers()
30+
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
31+
pmap(is -> A_mul_B_bench!(sm, is[2], is[1]), enumerate(sizes))
32+
br = BenchmarkResult(Matrix(sm), blastests(), sizes)
33+
stop_workers()
3534
br
3635
end
3736
function benchmark_AmulBt(sizes)
38-
br = BenchmarkResult(blastests(), sizes)
39-
sm = br.sizedresults.results
40-
pmap_startstop(is -> A_mul_Bt_bench!(sm, is[2], is[1]), enumerate(sizes))
37+
start_workers()
38+
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
39+
pmap(is -> A_mul_Bt_bench!(sm, is[2], is[1]), enumerate(sizes))
40+
br = BenchmarkResult(Matrix(sm), blastests(), sizes)
41+
stop_workers()
4142
br
4243
end
4344
function benchmark_AtmulB(sizes)
44-
br = BenchmarkResult(blastests(), sizes)
45-
sm = br.sizedresults.results
46-
pmap_startstop(is -> At_mul_B_bench!(sm, is[2], is[1]), enumerate(sizes))
45+
start_workers()
46+
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
47+
pmap(is -> At_mul_B_bench!(sm, is[2], is[1]), enumerate(sizes))
48+
br = BenchmarkResult(Matrix(sm), blastests(), sizes)
49+
stop_workers()
4750
br
4851
end
4952
function benchmark_AtmulBt(sizes)
50-
br = BenchmarkResult(blastests(), sizes)
51-
sm = br.sizedresults.results
52-
pmap_startstop(is -> At_mul_Bt_bench!(sm, is[2], is[1]), enumerate(sizes))
53+
start_workers()
54+
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
55+
pmap(is -> At_mul_Bt_bench!(sm, is[2], is[1]), enumerate(sizes))
56+
br = BenchmarkResult(Matrix(sm), blastests(), sizes)
57+
stop_workers()
5358
br
5459
end
5560
function benchmark_dot(sizes)
5661
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3"]#, "OpenBLAS"]
57-
br = BenchmarkResult(tests, sizes)
58-
sm = br.sizedresults.results
59-
pmap_startstop(is -> dot_bench!(sm, is[2], is[1]), enumerate(sizes))
62+
start_workers()
63+
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
64+
pmap(is -> dot_bench!(sm, is[2], is[1]), enumerate(sizes))
65+
br = BenchmarkResult(Matrix(sm), tests, sizes)
66+
stop_workers()
6067
br
6168
end
6269
function benchmark_selfdot(sizes)
6370
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3"]#, "OpenBLAS"]
64-
br = BenchmarkResult(tests, sizes)
65-
sm = br.sizedresults.results
66-
pmap_startstop(is -> selfdot_bench!(sm, is[2], is[1]), enumerate(sizes))
71+
start_workers()
72+
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
73+
pmap(is -> selfdot_bench!(sm, is[2], is[1]), enumerate(sizes))
74+
br = BenchmarkResult(Matrix(sm), tests, sizes)
75+
stop_workers()
6776
br
6877
end
6978
function benchmark_Amulvb(sizes)
70-
br = BenchmarkResult(blastests(), sizes)
71-
sm = br.sizedresults.results
72-
pmap_startstop(is -> A_mul_vb_bench!(sm, is[2], is[1]), enumerate(sizes))
79+
start_workers()
80+
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
81+
pmap(is -> A_mul_vb_bench!(sm, is[2], is[1]), enumerate(sizes))
82+
br = BenchmarkResult(Matrix(sm), blastests(), sizes)
83+
stop_workers()
7384
br
7485
end
7586
function benchmark_Atmulvb(sizes)
76-
br = BenchmarkResult(blastests(), sizes)
77-
sm = br.sizedresults.results
78-
pmap_startstop(is -> At_mul_vb_bench!(sm, is[2], is[1]), enumerate(sizes))
87+
start_workers()
88+
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
89+
pmap(is -> At_mul_vb_bench!(sm, is[2], is[1]), enumerate(sizes))
90+
br = BenchmarkResult(Matrix(sm), blastests(), sizes)
91+
stop_workers()
7992
br
8093
end
8194
function benchmark_dot3(sizes)
8295
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3", "LinearAlgebra" ]
83-
br = BenchmarkResult(tests, sizes)
84-
sm = br.sizedresults.results
85-
pmap_startstop(is -> dot3_bench!(sm, is[2], is[1]), enumerate(sizes))
96+
start_workers()
97+
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
98+
pmap(is -> dot3_bench!(sm, is[2], is[1]), enumerate(sizes))
99+
br = BenchmarkResult(Matrix(sm), tests, sizes)
100+
stop_workers()
86101
br
87102
end
88103
function benchmark_sse(sizes)
89104
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3", "MKL"]
90-
br = BenchmarkResult(tests, sizes)
91-
sm = br.sizedresults.results
92-
pmap_startstop(is -> sse_bench!(sm, is[2], is[1]), enumerate(sizes))
105+
start_workers()
106+
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
107+
pmap(is -> sse_bench!(sm, is[2], is[1]), enumerate(sizes))
108+
br = BenchmarkResult(Matrix(sm), tests, sizes)
109+
stop_workers()
93110
br
94111
end
95112
function benchmark_exp(sizes)
96113
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort"]
97-
br = BenchmarkResult(tests, sizes)
98-
sm = br.sizedresults.results
99-
pmap_startstop(is -> exp_bench!(sm, is[2], is[1]), enumerate(sizes))
114+
start_workers()
115+
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
116+
pmap(is -> exp_bench!(sm, is[2], is[1]), enumerate(sizes))
117+
br = BenchmarkResult(Matrix(sm), tests, sizes)
118+
stop_workers()
100119
br
101120
end
102121
function benchmark_aplusBc(sizes)
103122
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3"]
104-
br = BenchmarkResult(tests, sizes)
105-
sm = br.sizedresults.results
106-
pmap_startstop(is -> aplusBc_bench!(sm, is[2], is[1]), enumerate(sizes))
123+
start_workers()
124+
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
125+
pmap(is -> aplusBc_bench!(sm, is[2], is[1]), enumerate(sizes))
126+
br = BenchmarkResult(Matrix(sm), tests, sizes)
127+
stop_workers()
107128
br
108129
end
109130
function benchmark_AplusAt(sizes)
110131
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3", "GFortran-builtin", "ifort-builtin"]
111-
br = BenchmarkResult(tests, sizes)
112-
sm = br.sizedresults.results
113-
pmap_startstop(is -> AplusAt_bench!(sm, is[2], is[1]), enumerate(sizes))
132+
start_workers()
133+
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
134+
pmap(is -> AplusAt_bench!(sm, is[2], is[1]), enumerate(sizes))
135+
br = BenchmarkResult(Matrix(sm), tests, sizes)
136+
stop_workers()
114137
br
115138
end
116139
function benchmark_random_access(sizes)
117140
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort"]
118-
br = BenchmarkResult(tests, sizes)
119-
sm = br.sizedresults.results
120-
pmap_startstop(is -> randomaccess_bench!(sm, is[2], is[1]), enumerate(sizes))
141+
start_workers()
142+
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
143+
pmap(is -> randomaccess_bench!(sm, is[2], is[1]), enumerate(sizes))
144+
br = BenchmarkResult(Matrix(sm), tests, sizes)
145+
stop_workers()
121146
br
122147
end
123148
function benchmark_logdettriangle(sizes)
124149
# tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3", "LinearAlgebra"]
125150
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "LinearAlgebra"]
126-
br = BenchmarkResult(tests, sizes)
127-
sm = br.sizedresults.results
128-
pmap_startstop(is -> logdettriangle_bench!(sm, is[2], is[1]), enumerate(sizes))
151+
start_workers()
152+
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
153+
pmap(is -> logdettriangle_bench!(sm, is[2], is[1]), enumerate(sizes))
154+
br = BenchmarkResult(Matrix(sm), tests, sizes)
155+
stop_workers()
129156
br
130157
end
131158
function benchmark_filter2d(sizes, K)
132159
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort"]
133-
br = BenchmarkResult(tests, sizes)
134-
sm = br.sizedresults.results
135-
pmap_startstop(is -> filter2d_bench_run!(sm, is[2], is[1], K), enumerate(sizes))
160+
start_workers()
161+
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
162+
pmap(is -> filter2d_bench_run!(sm, is[2], is[1], K), enumerate(sizes))
163+
br = BenchmarkResult(Matrix(sm), tests, sizes)
164+
stop_workers()
136165
br
137166
end
138167
function benchmark_filter2ddynamic(sizes)
@@ -145,10 +174,12 @@ function benchmark_filter2d3x3(sizes)
145174
end
146175
function benchmark_filter2dunrolled(sizes)
147176
tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort"]
148-
br = BenchmarkResult(tests, sizes)
149-
sm = br.sizedresults.results
177+
start_workers()
178+
sm = SharedMatrix(Matrix{Float64}(undef, length(tests), length(sizes)))
150179
K = SizedOffsetMatrix{Float64,-1,1,-1,1}(rand(3,3))
151-
pmap_startstop(is -> filter2dunrolled_bench_run!(sm, is[2], is[1], K), enumerate(sizes))
180+
pmap(is -> filter2dunrolled_bench_run!(sm, is[2], is[1], K), enumerate(sizes))
181+
br = BenchmarkResult(Matrix(sm), tests, sizes)
182+
stop_workers()
152183
br
153184
end
154185

@@ -171,6 +202,7 @@ Atmulvb_bench = benchmark_Atmulvb(sizes); println("A' * b benchmark results:");
171202

172203
dot_bench = benchmark_dot(longsizes); println("a' * b benchmark results:"); println(dot_bench)
173204
selfdot_bench = benchmark_selfdot(longsizes); println("a' * a benchmark results:"); println(selfdot_bench)
205+
174206
sse_bench = benchmark_sse(sizes); println("Benchmark resutls of summing squared error:"); println(sse_bench)
175207
aplusBc_bench = benchmark_aplusBc(sizes); println("Benchmark results of a .+ B .* c':"); println(aplusBc_bench)
176208
AplusAt_bench = benchmark_AplusAt(sizes); println("Benchmark results of A * A':"); println(AplusAt_bench)
@@ -187,6 +219,10 @@ using Cairo, Fontconfig
187219
const PICTURES = joinpath(pkgdir(LoopVectorization), "docs", "src", "assets")
188220
saveplot(f, br) = draw(PNG(joinpath(PICTURES, f * "$v.png"), 12inch, 8inch), plot(br))
189221

222+
# If only rerunning a few, remove them from load.
223+
# @load "benchmarkresults.jld2" logdettriangle_bench filter2d_dynamic_bench filter2d_3x3_bench filter2d_unrolled_bench dot_bench selfdot_bench dot3_bench sse_bench aplusBc_bench AplusAt_bench vexp_bench randomaccess_bench AmulB_bench AmulBt_bench AtmulB_bench AtmulBt_bench Amulvb_bench Atmulvb_bench
224+
@load "benchmarkresults.jld2" logdettriangle_bench filter2d_dynamic_bench filter2d_3x3_bench filter2d_unrolled_bench dot3_bench sse_bench aplusBc_bench AplusAt_bench vexp_bench randomaccess_bench AmulB_bench AmulBt_bench AtmulB_bench AtmulBt_bench Amulvb_bench Atmulvb_bench
225+
190226
saveplot("bench_dot3_v", dot3_bench);
191227
saveplot("bench_dot_v", dot_bench);
192228
saveplot("bench_selfdot_v", selfdot_bench);
@@ -207,7 +243,7 @@ saveplot("bench_filter2d_unrolled_v", filter2d_unrolled_bench);
207243
saveplot("bench_exp_v", vexp_bench);
208244
saveplot("bench_random_access_v", randomaccess_bench);
209245

210-
# @load "benchmarkresults.jld2" logdettriangle_bench filter2d_dynamic_bench filter2d_3x3_bench filter2d_unrolled_bench vexp_bench randomaccess_bench
246+
211247

212248
@save "benchmarkresults.jld2" logdettriangle_bench filter2d_dynamic_bench filter2d_3x3_bench filter2d_unrolled_bench dot_bench selfdot_bench dot3_bench sse_bench aplusBc_bench AplusAt_bench vexp_bench randomaccess_bench AmulB_bench AmulBt_bench AtmulB_bench AtmulBt_bench Amulvb_bench Atmulvb_bench
213249

docs/src/assets/bench_AmulB_v1.png

-12.2 KB
Loading

docs/src/assets/bench_AmulBt_v1.png

-24.6 KB
Loading

docs/src/assets/bench_Amulvb_v1.png

3.12 KB
Loading

docs/src/assets/bench_AplusAt_v1.png

-156 KB
Loading

docs/src/assets/bench_AtmulB_v1.png

-1.92 KB
Loading

docs/src/assets/bench_AtmulBt_v1.png

-21.6 KB
Loading

docs/src/assets/bench_Atmulvb_v1.png

41.7 KB
Loading

docs/src/assets/bench_aplusBc_v1.png

24.1 KB
Loading

0 commit comments

Comments
 (0)