@@ -31,8 +31,15 @@ function Base.getindex(br::SizedResults, row, col)
31
31
col == 1 ? string (br. sizes[row]) : string (br. results[col - 1 , row])
32
32
end
33
33
Base. setindex! (br:: BenchmarkResult , v, i... ) = br. sizedresults. results[i... ] = v
34
+
35
+ const HIGHLIGHT_BEST = Highlighter (
36
+ (br,i,j) -> (j > 1 && maximum (@view (br. results[:, i])) == br. results[j- 1 ,i]),
37
+ foreground = :green
38
+ );
34
39
function Base. show (io:: IO , br:: BenchmarkResult )
35
- pretty_table (io, br. sizedresults, br. tests)
40
+ pretty_table (
41
+ io, br. sizedresults, br. tests, crop = :none , highlighters = (HIGHLIGHT_BEST,)
42
+ )
36
43
end
37
44
38
45
using VegaLite, IndexedTables
@@ -57,22 +64,17 @@ function plot(br::BenchmarkResult)
57
64
)
58
65
end
59
66
60
- function alloc_matrices (s:: NTuple{3,Int} )
61
- M, K, N = s
62
- C = Matrix {Float64} (undef, M, N)
63
- A = rand (M, K)
64
- B = rand (K, N)
65
- C, A, B
66
- end
67
- alloc_matrices (s:: Int ) = alloc_matrices ((s,s,s))
68
- gflop (s:: Int ) = s^ 3 * 2e-9
69
- gflop (s:: NTuple{3,Int} ) = prod (s) * 2e-9
67
+ tothreetuple (i:: Int ) = (i,i,i)
68
+ tothreetuple (i:: NTuple{3,Int} ) = i
70
69
function benchmark_gemm (sizes)
71
70
tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFort-loops" , " GFort-intrinsic" , " LoopVectorization" ]
72
71
br = BenchmarkResult (tests, sizes)
73
72
for (i,s) ∈ enumerate (sizes)
74
- C, A, B = alloc_matrices (s)
75
- n_gflop = gflop (s)
73
+ M, K, N = tothreetuple (s)
74
+ C = Matrix {Float64} (undef, M, N)
75
+ A = rand (M, K)
76
+ B = rand (K, N)
77
+ n_gflop = M* K* N* 2e-9
76
78
br[1 ,i] = n_gflop / @belapsed mul! ($ C, $ A, $ B)
77
79
Cblas = copy (C)
78
80
br[2 ,i] = n_gflop / @belapsed jgemm_nkm! ($ C, $ A, $ B)
@@ -85,9 +87,42 @@ function benchmark_gemm(sizes)
85
87
@assert C ≈ Cblas " Fort intrinsic gemm wrong?"
86
88
br[6 ,i] = n_gflop / @belapsed gemmavx! ($ C, $ A, $ B)
87
89
@assert C ≈ Cblas " LoopVec gemm wrong?"
90
+ if i % 10 == 0
91
+ percent_complete = round (100 i/ length (sizes), sigdigits = 4 )
92
+ @show percent_complete
93
+ end
94
+ end
95
+ br
96
+ end
97
+ function benchmark_AtmulB (sizes)
98
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFort-loops" , " GFort-intrinsic" , " LoopVectorization" ]
99
+ br = BenchmarkResult (tests, sizes)
100
+ for (i,s) ∈ enumerate (sizes)
101
+ M, K, N = tothreetuple (s)
102
+ C = Matrix {Float64} (undef, M, N)
103
+ At = rand (K, M)
104
+ B = rand (K, N)
105
+ n_gflop = M* K* N* 2e-9
106
+ br[1 ,i] = n_gflop / @belapsed mul! ($ C, $ At' , $ B)
107
+ Cblas = copy (C)
108
+ br[2 ,i] = n_gflop / @belapsed jAtmulB! ($ C, $ At, $ B)
109
+ @assert C ≈ Cblas " Julia gemm wrong?"
110
+ br[3 ,i] = n_gflop / @belapsed cAtmulB! ($ C, $ At, $ B)
111
+ @assert C ≈ Cblas " Polly gemm wrong?"
112
+ br[4 ,i] = n_gflop / @belapsed fAtmulB! ($ C, $ At, $ B)
113
+ @assert C ≈ Cblas " Fort gemm wrong?"
114
+ br[5 ,i] = n_gflop / @belapsed fAtmulB_builtin! ($ C, $ At, $ B)
115
+ @assert C ≈ Cblas " Fort intrinsic gemm wrong?"
116
+ br[6 ,i] = n_gflop / @belapsed jAtmulBavx! ($ C, $ At, $ B)
117
+ @assert C ≈ Cblas " LoopVec gemm wrong?"
118
+ if i % 10 == 0
119
+ percent_complete = round (100 i/ length (sizes), sigdigits = 4 )
120
+ @show percent_complete
121
+ end
88
122
end
89
123
br
90
124
end
125
+
91
126
function benchmark_dot (sizes)
92
127
tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFort-loops" , " LoopVectorization" ]
93
128
br = BenchmarkResult (tests, sizes)
@@ -104,6 +139,10 @@ function benchmark_dot(sizes)
104
139
@assert fdot (a,b) ≈ dotblas " Fort dot wrong?"
105
140
br[5 ,i] = n_gflop / @belapsed jdotavx ($ a, $ b)
106
141
@assert jdotavx (a,b) ≈ dotblas " LoopVec dot wrong?"
142
+ if i % 10 == 0
143
+ percent_complete = round (100 i/ length (sizes), sigdigits = 4 )
144
+ @show percent_complete
145
+ end
107
146
end
108
147
br
109
148
end
@@ -123,11 +162,63 @@ function benchmark_selfdot(sizes)
123
162
@assert fselfdot (a) ≈ dotblas " Fort dot wrong?"
124
163
br[5 ,i] = n_gflop / @belapsed jselfdotavx ($ a)
125
164
@assert jselfdotavx (a) ≈ dotblas " LoopVec dot wrong?"
165
+ if i % 10 == 0
166
+ percent_complete = round (100 i/ length (sizes), sigdigits = 4 )
167
+ @show percent_complete
168
+ end
126
169
end
127
170
br
128
171
end
129
172
totwotuple (i:: Int ) = (i,i)
130
173
totwotuple (i:: Tuple{Int,Int} ) = i
174
+ function benchmark_gemv (sizes)
175
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFort-loops" , " LoopVectorization" ]
176
+ br = BenchmarkResult (tests, sizes)
177
+ for (i,s) ∈ enumerate (sizes)
178
+ M, N = totwotuple (s)
179
+ x = Vector {Float64} (undef, M); A = rand (M, N); y = rand (N);
180
+ n_gflop = M* N * 2e-9
181
+ br[1 ,i] = n_gflop / @belapsed mul! ($ x, $ A, $ y)
182
+ xblas = copy (x)
183
+ br[2 ,i] = n_gflop / @belapsed jgemv! ($ x, $ A, $ y)
184
+ @assert x ≈ xblas " Julia wrong?"
185
+ br[3 ,i] = n_gflop / @belapsed cgemv! ($ x, $ A, $ y)
186
+ @assert x ≈ xblas " Polly wrong?"
187
+ br[4 ,i] = n_gflop / @belapsed fgemv! ($ x, $ A, $ y)
188
+ @assert x ≈ xblas " Fort wrong?"
189
+ br[5 ,i] = n_gflop / @belapsed jgemvavx! ($ x, $ A, $ y)
190
+ @assert x ≈ xblas " LoopVec wrong?"
191
+ if i % 10 == 0
192
+ percent_complete = round (100 i/ length (sizes), sigdigits = 4 )
193
+ @show percent_complete
194
+ end
195
+ end
196
+ br
197
+ end
198
+ function benchmark_dot3 (sizes)
199
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFort-loops" , " LoopVectorization" ]
200
+ br = BenchmarkResult (tests, sizes)
201
+ for (i,s) ∈ enumerate (sizes)
202
+ M, N = totwotuple (s)
203
+ x = rand (M); A = rand (M, N); y = rand (N);
204
+ n_gflop = M* N * 3e-9
205
+ br[1 ,i] = n_gflop / @belapsed dot ($ x, $ A, $ y)
206
+ dotblas = dot (x, A, y)
207
+ br[2 ,i] = n_gflop / @belapsed jdot3 ($ x, $ A, $ y)
208
+ @assert jdot3 (x, A, y) ≈ dotblas " Julia dot wrong?"
209
+ br[3 ,i] = n_gflop / @belapsed cdot3 ($ x, $ A, $ y)
210
+ @assert cdot3 (x, A, y) ≈ dotblas " Polly dot wrong?"
211
+ br[4 ,i] = n_gflop / @belapsed fdot3 ($ x, $ A, $ y)
212
+ @assert fdot3 (x, A, y) ≈ dotblas " Fort dot wrong?"
213
+ br[5 ,i] = n_gflop / @belapsed jdot3avx ($ x, $ A, $ y)
214
+ @assert jdot3avx (x, A, y) ≈ dotblas " LoopVec dot wrong?"
215
+ if i % 10 == 0
216
+ percent_complete = round (100 i/ length (sizes), sigdigits = 4 )
217
+ @show percent_complete
218
+ end
219
+ end
220
+ br
221
+ end
131
222
function sse! (Xβ, y, X, β)
132
223
mul! (copyto! (Xβ, y), X, β, 1.0 , - 1.0 )
133
224
dot (Xβ, Xβ)
@@ -151,22 +242,32 @@ function benchmark_sse(sizes)
151
242
@assert fOLSlp (y, X, β) ≈ lpblas " Fort wrong?"
152
243
br[5 ,i] = n_gflop / @belapsed jOLSlp_avx ($ y, $ X, $ β)
153
244
@assert jOLSlp_avx (y, X, β) ≈ lpblas " LoopVec wrong?"
245
+ if i % 10 == 0
246
+ percent_complete = round (100 i/ length (sizes), sigdigits = 4 )
247
+ @show percent_complete
248
+ end
154
249
end
155
250
br
156
251
end
157
252
158
253
function benchmark_exp (sizes)
159
- tests = [" Julia" , " GFort-loops" , " LoopVectorization" ]
254
+ tests = [" Julia" , " Clang-Polly " , " GFort-loops" , " LoopVectorization" ]
160
255
br = BenchmarkResult (tests, sizes)
161
256
for (i,s) ∈ enumerate (sizes)
162
257
a = rand (s); b = similar (a)
163
258
n_gflop = 1e-9 * s # not really gflops
164
259
br[1 ,i] = n_gflop / @belapsed @. $ b = exp ($ a)
165
260
baseb = copy (b)
166
- br[2 ,i] = n_gflop / @belapsed fvexp! ($ b, $ a)
261
+ br[2 ,i] = n_gflop / @belapsed cvexp! ($ b, $ a)
262
+ @assert b ≈ baseb " Clang wrong?"
263
+ br[3 ,i] = n_gflop / @belapsed fvexp! ($ b, $ a)
167
264
@assert b ≈ baseb " Fort wrong?"
168
- br[3 ,i] = n_gflop / @belapsed @avx @. $ b = exp ($ a)
265
+ br[4 ,i] = n_gflop / @belapsed @avx @. $ b = exp ($ a)
169
266
@assert b ≈ baseb " LoopVec wrong?"
267
+ if i % 10 == 0
268
+ percent_complete = round (100 i/ length (sizes), sigdigits = 4 )
269
+ @show percent_complete
270
+ end
170
271
end
171
272
br
172
273
end
@@ -187,6 +288,10 @@ function benchmark_aplusBc(sizes)
187
288
@assert D ≈ Dcopy " Fort wrong?"
188
289
br[4 ,i] = n_gflop / @belapsed @avx @. $ D = $ a + $ B * $ c′
189
290
@assert D ≈ Dcopy " LoopVec wrong?"
291
+ if i % 10 == 0
292
+ percent_complete = round (100 i/ length (sizes), sigdigits = 4 )
293
+ @show percent_complete
294
+ end
190
295
end
191
296
br
192
297
end
0 commit comments