2
2
# const LOOPVECBENCHDIR = joinpath(pkgdir("LoopVectorization"), "benchmarks")
3
3
# includet(joinpath(LOOPVECBENCHDIR, "driver.jl"))
4
4
5
- using Distributed, LoopVectorization, JLD2
6
-
5
+ using Distributed, LoopVectorization, JLD2, ProgressMeter
7
6
const LOOPVECBENCHDIR = joinpath (pkgdir (LoopVectorization), " benchmark" )
8
7
include (joinpath (LOOPVECBENCHDIR, " benchmarkflops.jl" ))
9
8
include (joinpath (LOOPVECBENCHDIR, " plotbenchmarks.jl" ))
10
9
11
10
12
- # nprocs_to_add() = (Sys.CPU_THREADS >> 1) - 1
13
- nprocs_to_add () = (Sys. CPU_THREADS >> 1 )
11
+ nprocs_to_add () = (( Sys. CPU_THREADS) :: Int >> 1 )
12
+ # nprocs_to_add() = (( Sys.CPU_THREADS)::Int >> 1) - 1
14
13
start_worker (wid) = remotecall (include, wid, joinpath (LOOPVECBENCHDIR, " setup_worker.jl" ))
15
- function start_workers ()
16
- addprocs (nprocs_to_add () )
17
- foreach (wait, map (start_worker, workers ()))
14
+ function start_workers (nprocs = nprocs_to_add () )
15
+ addprocs (nprocs, exeflags = " --project= $(Base . active_project ()) " )
16
+ foreach (wait, map (start_worker, workers ()))
18
17
end
19
18
stop_workers () = rmprocs (workers ())
20
19
@@ -29,36 +28,36 @@ function blastests()
29
28
end
30
29
function benchmark_AmulB (sizes)
31
30
tests = blastests ()
32
- start_workers ()
31
+ start_workers (nprocs_to_add () >> 1 )
33
32
sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
34
- pmap (is -> A_mul_B_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
33
+ @showprogress pmap (is -> A_mul_B_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
35
34
br = BenchmarkResult (Matrix (sm), tests, sizes)
36
35
stop_workers ()
37
36
br
38
37
end
39
38
function benchmark_AmulBt (sizes)
40
39
tests = blastests ()
41
- start_workers ()
40
+ start_workers (nprocs_to_add () >> 1 )
42
41
sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
43
- pmap (is -> A_mul_Bt_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
42
+ @showprogress pmap (is -> A_mul_Bt_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
44
43
br = BenchmarkResult (Matrix (sm), tests, sizes)
45
44
stop_workers ()
46
45
br
47
46
end
48
47
function benchmark_AtmulB (sizes)
49
48
tests = blastests ()
50
- start_workers ()
49
+ start_workers (nprocs_to_add () >> 1 )
51
50
sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
52
- pmap (is -> At_mul_B_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
51
+ @showprogress pmap (is -> At_mul_B_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
53
52
br = BenchmarkResult (Matrix (sm), tests, sizes)
54
53
stop_workers ()
55
54
br
56
55
end
57
56
function benchmark_AtmulBt (sizes)
58
57
tests = blastests ()
59
- start_workers ()
58
+ start_workers (nprocs_to_add () >> 1 )
60
59
sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
61
- pmap (is -> At_mul_Bt_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
60
+ @showprogress pmap (is -> At_mul_Bt_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
62
61
br = BenchmarkResult (Matrix (sm), tests, sizes)
63
62
stop_workers ()
64
63
br
@@ -73,7 +72,7 @@ function benchmark_dot(sizes)
73
72
tests = dot_tests ()
74
73
start_workers ()
75
74
sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
76
- pmap (is -> dot_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
75
+ @showprogress pmap (is -> dot_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
77
76
br = BenchmarkResult (Matrix (sm), tests, sizes)
78
77
stop_workers ()
79
78
br
@@ -82,7 +81,7 @@ function benchmark_selfdot(sizes)
82
81
tests = dot_tests ()
83
82
start_workers ()
84
83
sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
85
- pmap (is -> selfdot_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
84
+ @showprogress pmap (is -> selfdot_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
86
85
br = BenchmarkResult (Matrix (sm), tests, sizes)
87
86
stop_workers ()
88
87
br
@@ -91,7 +90,7 @@ function benchmark_Amulvb(sizes)
91
90
tests = blastests ()
92
91
start_workers ()
93
92
sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
94
- pmap (is -> A_mul_vb_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
93
+ @showprogress pmap (is -> A_mul_vb_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
95
94
br = BenchmarkResult (Matrix (sm), tests, sizes)
96
95
stop_workers ()
97
96
br
@@ -100,31 +99,31 @@ function benchmark_Atmulvb(sizes)
100
99
tests = blastests ()
101
100
start_workers ()
102
101
sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
103
- pmap (is -> At_mul_vb_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
102
+ @showprogress pmap (is -> At_mul_vb_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
104
103
br = BenchmarkResult (Matrix (sm), tests, sizes)
105
104
stop_workers ()
106
105
br
107
106
end
108
107
function benchmark_dot3 (sizes)
109
108
tests = [" LoopVectorization" , " Julia" , " Clang" , " GFortran" ]
110
109
INTEL_BENCH && push! (tests, " icc" , " ifort" )
111
- push! (test , " g++ & Eigen-3" , " clang++ & Eigen-3" , " LinearAlgebra" )
110
+ push! (tests , " g++ & Eigen-3" , " clang++ & Eigen-3" , " LinearAlgebra" )
112
111
start_workers ()
113
112
sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
114
- pmap (is -> dot3_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
113
+ @showprogress pmap (is -> dot3_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
115
114
br = BenchmarkResult (Matrix (sm), tests, sizes)
116
115
stop_workers ()
117
116
br
118
117
end
119
118
function benchmark_sse (sizes)
120
119
tests = [" LoopVectorization" , " Julia" , " Clang" , " GFortran" ]
121
120
INTEL_BENCH && push! (tests, " icc" , " ifort" )
122
- push! (test , " g++ & Eigen-3" , " clang++ & Eigen-3" , " MKL " )
121
+ push! (tests , " g++ & Eigen-3" , " clang++ & Eigen-3" )
123
122
MKL_BENCH && push! (tests, " MKL" )
124
123
125
124
start_workers ()
126
125
sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
127
- pmap (is -> sse_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
126
+ @showprogress pmap (is -> sse_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
128
127
br = BenchmarkResult (Matrix (sm), tests, sizes)
129
128
stop_workers ()
130
129
br
@@ -134,7 +133,7 @@ function benchmark_exp(sizes)
134
133
INTEL_BENCH && push! (tests, " icc" , " ifort" )
135
134
start_workers ()
136
135
sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
137
- pmap (is -> exp_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
136
+ @showprogress pmap (is -> exp_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
138
137
br = BenchmarkResult (Matrix (sm), tests, sizes)
139
138
stop_workers ()
140
139
br
@@ -145,7 +144,7 @@ function benchmark_aplusBc(sizes)
145
144
push! (tests, " g++ & Eigen-3" , " clang++ & Eigen-3" )
146
145
start_workers ()
147
146
sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
148
- pmap (is -> aplusBc_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
147
+ @showprogress pmap (is -> aplusBc_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
149
148
br = BenchmarkResult (Matrix (sm), tests, sizes)
150
149
stop_workers ()
151
150
br
@@ -157,7 +156,7 @@ function benchmark_AplusAt(sizes)
157
156
INTEL_BENCH && push! (tests, " ifort-builtin" )
158
157
start_workers ()
159
158
sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
160
- pmap (is -> AplusAt_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
159
+ @showprogress pmap (is -> AplusAt_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
161
160
br = BenchmarkResult (Matrix (sm), tests, sizes)
162
161
stop_workers ()
163
162
br
@@ -167,7 +166,7 @@ function benchmark_random_access(sizes)
167
166
INTEL_BENCH && push! (tests, " icc" , " ifort" )
168
167
start_workers ()
169
168
sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
170
- pmap (is -> randomaccess_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
169
+ @showprogress pmap (is -> randomaccess_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
171
170
br = BenchmarkResult (Matrix (sm), tests, sizes)
172
171
stop_workers ()
173
172
br
@@ -179,7 +178,7 @@ function benchmark_logdettriangle(sizes)
179
178
push! (tests, " LinearAlgebra" )
180
179
start_workers ()
181
180
sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
182
- pmap (is -> logdettriangle_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
181
+ @showprogress pmap (is -> logdettriangle_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
183
182
br = BenchmarkResult (Matrix (sm), tests, sizes)
184
183
stop_workers ()
185
184
br
@@ -189,7 +188,7 @@ function benchmark_filter2d(sizes, K)
189
188
INTEL_BENCH && push! (tests, " icc" , " ifort" )
190
189
start_workers ()
191
190
sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
192
- pmap (is -> filter2d_bench_run! (sm, is[2 ], is[1 ], K), enumerate (sizes))
191
+ @showprogress pmap (is -> filter2d_bench_run! (sm, is[2 ], is[1 ], K), enumerate (sizes))
193
192
br = BenchmarkResult (Matrix (sm), tests, sizes)
194
193
stop_workers ()
195
194
br
@@ -208,7 +207,7 @@ function benchmark_filter2dunrolled(sizes)
208
207
start_workers ()
209
208
sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
210
209
K = SizedOffsetMatrix {Float64,-1,1,-1,1} (rand (3 ,3 ))
211
- pmap (is -> filter2dunrolled_bench_run! (sm, is[2 ], is[1 ], K), enumerate (sizes))
210
+ @showprogress pmap (is -> filter2dunrolled_bench_run! (sm, is[2 ], is[1 ], K), enumerate (sizes))
212
211
br = BenchmarkResult (Matrix (sm), tests, sizes)
213
212
stop_workers ()
214
213
br
@@ -234,21 +233,22 @@ println("A' * b benchmark results:"); Atmulvb_bench = benchmark_Atmulvb(sizes);
234
233
println (" a' * b benchmark results:" ); dot_bench = benchmark_dot (longsizes); println (dot_bench)
235
234
println (" a' * a benchmark results:" ); selfdot_bench = benchmark_selfdot (longsizes); println (selfdot_bench)
236
235
237
- println (" Benchmark resutls of summing squared error:" ); sse_bench = benchmark_sse (sizes); println (sse_bench)
238
236
println (" Benchmark results of a .+ B .* c':" ); aplusBc_bench = benchmark_aplusBc (sizes); println (aplusBc_bench)
239
237
println (" Benchmark results of A .+ A':" ); AplusAt_bench = benchmark_AplusAt (sizes); println (AplusAt_bench)
240
238
241
239
println (" Benchmark results for dynamically sized 3x3 convolution:" ); filter2d_dynamic_bench = benchmark_filter2ddynamic (sizes); println (filter2d_dynamic_bench)
242
240
println (" Benchmark results for statically sized 3x3 convolution:" ); filter2d_3x3_bench = benchmark_filter2d3x3 (sizes); println (filter2d_3x3_bench)
243
241
println (" Benchmark results for unrolled 3x3 convolution:" ); filter2d_unrolled_bench = benchmark_filter2dunrolled (sizes); println (filter2d_unrolled_bench)
244
242
243
+ println (" Benchmark resutls of summing squared error:" ); sse_bench = benchmark_sse (sizes); println (sse_bench)
245
244
println (" Benchmark results of exponentiating a vector:" ); vexp_bench = benchmark_exp (sizes); println (vexp_bench)
246
245
println (" Benchmark results from using a vector of indices:" ); randomaccess_bench = benchmark_random_access (sizes); println (randomaccess_bench)
247
246
248
247
const v = 2
249
- using Cairo, Fontconfig
248
+ # using Cairo, Fontconfig
250
249
const PICTURES = joinpath (pkgdir (LoopVectorization), " docs" , " src" , " assets" )
251
- saveplot (f, br) = draw (PNG (joinpath (PICTURES, f * " $v .png" ), 12 inch, 8 inch), plot (br))
250
+ # saveplot(f, br) = draw(PNG(joinpath(PICTURES, f * "$v.png"), 12inch, 8inch), plot(br))
251
+ saveplot (f, br) = draw (SVG (joinpath (PICTURES, f * " $v .svg" ), 12 inch, 8 inch), plot (br))
252
252
253
253
# If only rerunning a few, remove them from load.
254
254
# @load "benchmarkresults.jld2" logdettriangle_bench filter2d_dynamic_bench filter2d_3x3_bench filter2d_unrolled_bench dot_bench selfdot_bench dot3_bench sse_bench aplusBc_bench AplusAt_bench vexp_bench randomaccess_bench AmulB_bench AmulBt_bench AtmulB_bench AtmulBt_bench Amulvb_bench Atmulvb_bench
0 commit comments