@@ -16,9 +16,6 @@ function start_workers()
16
16
foreach (wait, map (start_worker, workers ()))
17
17
end
18
18
stop_workers () = rmprocs (workers ())
19
- addprocs (); nworkers ()
20
-
21
- pmap_startstop (f, s) = (start_workers (); r = pmap (f, s); stop_workers (); r)
22
19
23
20
blastests () = [
24
21
" LoopVectorization" ,
@@ -29,110 +26,142 @@ blastests() = [
29
26
" OpenBLAS" , " MKL"
30
27
]
31
28
function benchmark_AmulB (sizes)
32
- br = BenchmarkResult (blastests (), sizes)
33
- sm = br. sizedresults. results
34
- pmap_startstop (is -> A_mul_B_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
29
+ start_workers ()
30
+ sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
31
+ pmap (is -> A_mul_B_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
32
+ br = BenchmarkResult (Matrix (sm), blastests (), sizes)
33
+ stop_workers ()
35
34
br
36
35
end
37
36
function benchmark_AmulBt (sizes)
38
- br = BenchmarkResult (blastests (), sizes)
39
- sm = br. sizedresults. results
40
- pmap_startstop (is -> A_mul_Bt_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
37
+ start_workers ()
38
+ sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
39
+ pmap (is -> A_mul_Bt_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
40
+ br = BenchmarkResult (Matrix (sm), blastests (), sizes)
41
+ stop_workers ()
41
42
br
42
43
end
43
44
function benchmark_AtmulB (sizes)
44
- br = BenchmarkResult (blastests (), sizes)
45
- sm = br. sizedresults. results
46
- pmap_startstop (is -> At_mul_B_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
45
+ start_workers ()
46
+ sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
47
+ pmap (is -> At_mul_B_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
48
+ br = BenchmarkResult (Matrix (sm), blastests (), sizes)
49
+ stop_workers ()
47
50
br
48
51
end
49
52
function benchmark_AtmulBt (sizes)
50
- br = BenchmarkResult (blastests (), sizes)
51
- sm = br. sizedresults. results
52
- pmap_startstop (is -> At_mul_Bt_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
53
+ start_workers ()
54
+ sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
55
+ pmap (is -> At_mul_Bt_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
56
+ br = BenchmarkResult (Matrix (sm), blastests (), sizes)
57
+ stop_workers ()
53
58
br
54
59
end
55
60
function benchmark_dot (sizes)
56
61
tests = [" LoopVectorization" , " Julia" , " Clang" , " GFortran" , " icc" , " ifort" , " g++ & Eigen-3" , " clang++ & Eigen-3" ]# , "OpenBLAS"]
57
- br = BenchmarkResult (tests, sizes)
58
- sm = br. sizedresults. results
59
- pmap_startstop (is -> dot_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
62
+ start_workers ()
63
+ sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
64
+ pmap (is -> dot_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
65
+ br = BenchmarkResult (Matrix (sm), tests, sizes)
66
+ stop_workers ()
60
67
br
61
68
end
62
69
function benchmark_selfdot (sizes)
63
70
tests = [" LoopVectorization" , " Julia" , " Clang" , " GFortran" , " icc" , " ifort" , " g++ & Eigen-3" , " clang++ & Eigen-3" ]# , "OpenBLAS"]
64
- br = BenchmarkResult (tests, sizes)
65
- sm = br. sizedresults. results
66
- pmap_startstop (is -> selfdot_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
71
+ start_workers ()
72
+ sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
73
+ pmap (is -> selfdot_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
74
+ br = BenchmarkResult (Matrix (sm), tests, sizes)
75
+ stop_workers ()
67
76
br
68
77
end
69
78
function benchmark_Amulvb (sizes)
70
- br = BenchmarkResult (blastests (), sizes)
71
- sm = br. sizedresults. results
72
- pmap_startstop (is -> A_mul_vb_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
79
+ start_workers ()
80
+ sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
81
+ pmap (is -> A_mul_vb_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
82
+ br = BenchmarkResult (Matrix (sm), blastests (), sizes)
83
+ stop_workers ()
73
84
br
74
85
end
75
86
function benchmark_Atmulvb (sizes)
76
- br = BenchmarkResult (blastests (), sizes)
77
- sm = br. sizedresults. results
78
- pmap_startstop (is -> At_mul_vb_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
87
+ start_workers ()
88
+ sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
89
+ pmap (is -> At_mul_vb_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
90
+ br = BenchmarkResult (Matrix (sm), blastests (), sizes)
91
+ stop_workers ()
79
92
br
80
93
end
81
94
function benchmark_dot3 (sizes)
82
95
tests = [" LoopVectorization" , " Julia" , " Clang" , " GFortran" , " icc" , " ifort" , " g++ & Eigen-3" , " clang++ & Eigen-3" , " LinearAlgebra" ]
83
- br = BenchmarkResult (tests, sizes)
84
- sm = br. sizedresults. results
85
- pmap_startstop (is -> dot3_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
96
+ start_workers ()
97
+ sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
98
+ pmap (is -> dot3_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
99
+ br = BenchmarkResult (Matrix (sm), tests, sizes)
100
+ stop_workers ()
86
101
br
87
102
end
88
103
function benchmark_sse (sizes)
89
104
tests = [" LoopVectorization" , " Julia" , " Clang" , " GFortran" , " icc" , " ifort" , " g++ & Eigen-3" , " clang++ & Eigen-3" , " MKL" ]
90
- br = BenchmarkResult (tests, sizes)
91
- sm = br. sizedresults. results
92
- pmap_startstop (is -> sse_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
105
+ start_workers ()
106
+ sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
107
+ pmap (is -> sse_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
108
+ br = BenchmarkResult (Matrix (sm), tests, sizes)
109
+ stop_workers ()
93
110
br
94
111
end
95
112
function benchmark_exp (sizes)
96
113
tests = [" LoopVectorization" , " Julia" , " Clang" , " GFortran" , " icc" , " ifort" ]
97
- br = BenchmarkResult (tests, sizes)
98
- sm = br. sizedresults. results
99
- pmap_startstop (is -> exp_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
114
+ start_workers ()
115
+ sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
116
+ pmap (is -> exp_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
117
+ br = BenchmarkResult (Matrix (sm), tests, sizes)
118
+ stop_workers ()
100
119
br
101
120
end
102
121
function benchmark_aplusBc (sizes)
103
122
tests = [" LoopVectorization" , " Julia" , " Clang" , " GFortran" , " icc" , " ifort" , " g++ & Eigen-3" , " clang++ & Eigen-3" ]
104
- br = BenchmarkResult (tests, sizes)
105
- sm = br. sizedresults. results
106
- pmap_startstop (is -> aplusBc_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
123
+ start_workers ()
124
+ sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
125
+ pmap (is -> aplusBc_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
126
+ br = BenchmarkResult (Matrix (sm), tests, sizes)
127
+ stop_workers ()
107
128
br
108
129
end
109
130
function benchmark_AplusAt (sizes)
110
131
tests = [" LoopVectorization" , " Julia" , " Clang" , " GFortran" , " icc" , " ifort" , " g++ & Eigen-3" , " clang++ & Eigen-3" , " GFortran-builtin" , " ifort-builtin" ]
111
- br = BenchmarkResult (tests, sizes)
112
- sm = br. sizedresults. results
113
- pmap_startstop (is -> AplusAt_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
132
+ start_workers ()
133
+ sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
134
+ pmap (is -> AplusAt_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
135
+ br = BenchmarkResult (Matrix (sm), tests, sizes)
136
+ stop_workers ()
114
137
br
115
138
end
116
139
function benchmark_random_access (sizes)
117
140
tests = [" LoopVectorization" , " Julia" , " Clang" , " GFortran" , " icc" , " ifort" ]
118
- br = BenchmarkResult (tests, sizes)
119
- sm = br. sizedresults. results
120
- pmap_startstop (is -> randomaccess_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
141
+ start_workers ()
142
+ sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
143
+ pmap (is -> randomaccess_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
144
+ br = BenchmarkResult (Matrix (sm), tests, sizes)
145
+ stop_workers ()
121
146
br
122
147
end
123
148
function benchmark_logdettriangle (sizes)
124
149
# tests = ["LoopVectorization", "Julia", "Clang", "GFortran", "icc", "ifort", "g++ & Eigen-3", "clang++ & Eigen-3", "LinearAlgebra"]
125
150
tests = [" LoopVectorization" , " Julia" , " Clang" , " GFortran" , " icc" , " ifort" , " LinearAlgebra" ]
126
- br = BenchmarkResult (tests, sizes)
127
- sm = br. sizedresults. results
128
- pmap_startstop (is -> logdettriangle_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
151
+ start_workers ()
152
+ sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
153
+ pmap (is -> logdettriangle_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
154
+ br = BenchmarkResult (Matrix (sm), tests, sizes)
155
+ stop_workers ()
129
156
br
130
157
end
131
158
function benchmark_filter2d (sizes, K)
132
159
tests = [" LoopVectorization" , " Julia" , " Clang" , " GFortran" , " icc" , " ifort" ]
133
- br = BenchmarkResult (tests, sizes)
134
- sm = br. sizedresults. results
135
- pmap_startstop (is -> filter2d_bench_run! (sm, is[2 ], is[1 ], K), enumerate (sizes))
160
+ start_workers ()
161
+ sm = SharedMatrix (Matrix {Float64} (undef, length (tests), length (sizes)))
162
+ pmap (is -> filter2d_bench_run! (sm, is[2 ], is[1 ], K), enumerate (sizes))
163
+ br = BenchmarkResult (Matrix (sm), tests, sizes)
164
+ stop_workers ()
136
165
br
137
166
end
138
167
function benchmark_filter2ddynamic (sizes)
@@ -145,10 +174,12 @@ function benchmark_filter2d3x3(sizes)
145
174
end
146
175
function benchmark_filter2dunrolled (sizes)
147
176
tests = [" LoopVectorization" , " Julia" , " Clang" , " GFortran" , " icc" , " ifort" ]
148
- br = BenchmarkResult (tests, sizes )
149
- sm = br . sizedresults . results
177
+ start_workers ( )
178
+ sm = SharedMatrix ( Matrix {Float64} (undef, length (tests), length (sizes)))
150
179
K = SizedOffsetMatrix {Float64,-1,1,-1,1} (rand (3 ,3 ))
151
- pmap_startstop (is -> filter2dunrolled_bench_run! (sm, is[2 ], is[1 ], K), enumerate (sizes))
180
+ pmap (is -> filter2dunrolled_bench_run! (sm, is[2 ], is[1 ], K), enumerate (sizes))
181
+ br = BenchmarkResult (Matrix (sm), tests, sizes)
182
+ stop_workers ()
152
183
br
153
184
end
154
185
@@ -171,6 +202,7 @@ Atmulvb_bench = benchmark_Atmulvb(sizes); println("A' * b benchmark results:");
171
202
172
203
dot_bench = benchmark_dot (longsizes); println (" a' * b benchmark results:" ); println (dot_bench)
173
204
selfdot_bench = benchmark_selfdot (longsizes); println (" a' * a benchmark results:" ); println (selfdot_bench)
205
+
174
206
sse_bench = benchmark_sse (sizes); println (" Benchmark resutls of summing squared error:" ); println (sse_bench)
175
207
aplusBc_bench = benchmark_aplusBc (sizes); println (" Benchmark results of a .+ B .* c':" ); println (aplusBc_bench)
176
208
AplusAt_bench = benchmark_AplusAt (sizes); println (" Benchmark results of A * A':" ); println (AplusAt_bench)
@@ -187,6 +219,10 @@ using Cairo, Fontconfig
187
219
const PICTURES = joinpath (pkgdir (LoopVectorization), " docs" , " src" , " assets" )
188
220
saveplot (f, br) = draw (PNG (joinpath (PICTURES, f * " $v .png" ), 12 inch, 8 inch), plot (br))
189
221
222
+ # If only rerunning a few, remove them from load.
223
+ # @load "benchmarkresults.jld2" logdettriangle_bench filter2d_dynamic_bench filter2d_3x3_bench filter2d_unrolled_bench dot_bench selfdot_bench dot3_bench sse_bench aplusBc_bench AplusAt_bench vexp_bench randomaccess_bench AmulB_bench AmulBt_bench AtmulB_bench AtmulBt_bench Amulvb_bench Atmulvb_bench
224
+ @load " benchmarkresults.jld2" logdettriangle_bench filter2d_dynamic_bench filter2d_3x3_bench filter2d_unrolled_bench dot3_bench sse_bench aplusBc_bench AplusAt_bench vexp_bench randomaccess_bench AmulB_bench AmulBt_bench AtmulB_bench AtmulBt_bench Amulvb_bench Atmulvb_bench
225
+
190
226
saveplot (" bench_dot3_v" , dot3_bench);
191
227
saveplot (" bench_dot_v" , dot_bench);
192
228
saveplot (" bench_selfdot_v" , selfdot_bench);
@@ -207,7 +243,7 @@ saveplot("bench_filter2d_unrolled_v", filter2d_unrolled_bench);
207
243
saveplot (" bench_exp_v" , vexp_bench);
208
244
saveplot (" bench_random_access_v" , randomaccess_bench);
209
245
210
- # @load "benchmarkresults.jld2" logdettriangle_bench filter2d_dynamic_bench filter2d_3x3_bench filter2d_unrolled_bench vexp_bench randomaccess_bench
246
+
211
247
212
248
@save " benchmarkresults.jld2" logdettriangle_bench filter2d_dynamic_bench filter2d_3x3_bench filter2d_unrolled_bench dot_bench selfdot_bench dot3_bench sse_bench aplusBc_bench AplusAt_bench vexp_bench randomaccess_bench AmulB_bench AmulBt_bench AtmulB_bench AtmulBt_bench Amulvb_bench Atmulvb_bench
213
249
0 commit comments