@@ -25,7 +25,15 @@ function Base.getindex(br::SizedResults, row, col)
25
25
col == 1 ? string (br. sizes[row]) : string (br. results[col - 1 , row])
26
26
end
27
27
Base. setindex! (br:: BenchmarkResult , v, i... ) = br. sizedresults. results[i... ] = v
28
-
28
+ function Base. vcat (br1:: BenchmarkResult , br2:: BenchmarkResult )
29
+ BenchmarkResult (
30
+ br1. tests,
31
+ SizedResults (
32
+ SharedMatrix (hcat (br1. sizedresults. results, br2. sizedresults. results)),
33
+ vcat (br1. sizedresults. sizes, br2. sizedresults. sizes)
34
+ )
35
+ )
36
+ end
29
37
30
38
tothreetuple (i:: Int ) = (i,i,i)
31
39
tothreetuple (i:: NTuple{3,Int} ) = i
@@ -52,9 +60,10 @@ function matmul_bench!(br, C, A, B, i)
52
60
@assert C ≈ Cblas " eigen gemm wrong?" ; fill! (C, NaN )
53
61
br[10 ,i] = n_gflop / @belapsed iegemm! ($ C, $ A, $ B)
54
62
@assert C ≈ Cblas " i-eigen gemm wrong?" ; fill! (C, NaN )
55
- br[11 ,i] = n_gflop / @belapsed dgemmjit! ($ C, $ A, $ B)
56
- @assert C ≈ Cblas " MKL JIT gemm wrong?" ; fill! (C, NaN )
57
- br[12 ,i] = n_gflop / @belapsed gemmavx! ($ C, $ A, $ B)
63
+ # br[11,i] = n_gflop / @belapsed dgemmjit!($C, $A, $B)
64
+ # @assert C ≈ Cblas "MKL JIT gemm wrong?"; fill!(C, NaN)
65
+ # br[12,i] = n_gflop / @belapsed gemmavx!($C, $A, $B)
66
+ br[end ,i] = n_gflop / @belapsed gemmavx! ($ C, $ A, $ B)
58
67
@assert C ≈ Cblas " LoopVec gemm wrong?"
59
68
end
60
69
function A_mul_B_bench! (br, s, i)
@@ -93,35 +102,36 @@ function At_mul_Bt_bench!(br, s, i)
93
102
matmul_bench! (br, C, A, B, i)
94
103
end
95
104
96
- const BLASTESTS = [
105
+ blastests () = [
97
106
BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" ,
98
107
" Julia" , " Clang-Polly" ,
99
108
" GFortran" , " GFort-intrinsic" ,
100
109
" icc" , " ifort" , " ifort-intrinsic" ,
101
- " Clang++ & Eigen-3" , " icpc & Eigen-3" ,
102
- " MKL JIT" , " LoopVectorization"
110
+ " g++ & Eigen-3" , " icpc & Eigen-3" ,
111
+ " LoopVectorization"
112
+ # "MKL JIT", "LoopVectorization"
103
113
]
104
114
105
115
function benchmark_AmulB (sizes)
106
- br = BenchmarkResult (BLASTESTS , sizes)
116
+ br = BenchmarkResult (blastests () , sizes)
107
117
sm = br. sizedresults. results
108
118
pmap (is -> A_mul_B_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
109
119
br
110
120
end
111
121
function benchmark_AmulBt (sizes)
112
- br = BenchmarkResult (BLASTESTS , sizes)
122
+ br = BenchmarkResult (blastests () , sizes)
113
123
sm = br. sizedresults. results
114
124
pmap (is -> A_mul_Bt_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
115
125
br
116
126
end
117
127
function benchmark_AtmulB (sizes)
118
- br = BenchmarkResult (BLASTESTS , sizes)
128
+ br = BenchmarkResult (blastests () , sizes)
119
129
sm = br. sizedresults. results
120
130
pmap (is -> At_mul_B_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
121
131
br
122
132
end
123
133
function benchmark_AtmulBt (sizes)
124
- br = BenchmarkResult (BLASTESTS , sizes)
134
+ br = BenchmarkResult (blastests () , sizes)
125
135
sm = br. sizedresults. results
126
136
pmap (is -> At_mul_Bt_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
127
137
br
@@ -150,7 +160,7 @@ function dot_bench!(br, s, i)
150
160
@assert jdotavx (a,b) ≈ dotblas " LoopVec dot wrong?"
151
161
end
152
162
function benchmark_dot (sizes)
153
- tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " Clang ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
163
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " g ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
154
164
br = BenchmarkResult (tests, sizes)
155
165
sm = br. sizedresults. results
156
166
pmap (is -> dot_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
@@ -179,7 +189,7 @@ function selfdot_bench!(br, s, i)
179
189
@assert jselfdotavx (a) ≈ dotblas " LoopVec dot wrong?"
180
190
end
181
191
function benchmark_selfdot (sizes)
182
- tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " Clang ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
192
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " g ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
183
193
br = BenchmarkResult (tests, sizes)
184
194
sm = br. sizedresults. results
185
195
pmap (is -> selfdot_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
@@ -211,9 +221,9 @@ function gemv_bench!(br, x, A, y, i)
211
221
@assert x ≈ xblas " eigen wrong?" ; fill! (x, NaN );
212
222
br[10 ,i] = n_gflop / @belapsed iegemv! ($ x, $ A, $ y)
213
223
@assert x ≈ xblas " i-eigen wrong?" ; fill! (x, NaN );
214
- br[11 ,i] = n_gflop / @belapsed dgemmjit! ($ x, $ A, $ y)
215
- @assert x ≈ xblas " gemmjit wrong?" ; fill! (x, NaN );
216
- br[12 ,i] = n_gflop / @belapsed jgemvavx! ($ x, $ A, $ y)
224
+ # br[11,i] = n_gflop / @belapsed dgemmjit!($x, $A, $y)
225
+ # @assert x ≈ xblas "gemmjit wrong?"; fill!(x, NaN);
226
+ br[end ,i] = n_gflop / @belapsed jgemvavx! ($ x, $ A, $ y)
217
227
@assert x ≈ xblas " LoopVec wrong?"
218
228
end
219
229
function A_mul_vb_bench! (br, s, i)
@@ -231,13 +241,13 @@ function At_mul_vb_bench!(br, s, i)
231
241
gemv_bench! (br, x, A, y, i)
232
242
end
233
243
function benchmark_Amulvb (sizes)
234
- br = BenchmarkResult (BLASTESTS , sizes)
244
+ br = BenchmarkResult (blastests () , sizes)
235
245
sm = br. sizedresults. results
236
246
pmap (is -> A_mul_vb_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
237
247
br
238
248
end
239
249
function benchmark_Atmulvb (sizes)
240
- br = BenchmarkResult (BLASTESTS , sizes)
250
+ br = BenchmarkResult (blastests () , sizes)
241
251
sm = br. sizedresults. results
242
252
pmap (is -> At_mul_vb_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
243
253
br
@@ -267,7 +277,7 @@ function dot3_bench!(br, s, i)
267
277
@assert jdot3avx (x, A, y) ≈ dotblas " LoopVec dot wrong?"
268
278
end
269
279
function benchmark_dot3 (sizes)
270
- tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " Clang ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
280
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " g ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
271
281
br = BenchmarkResult (tests, sizes)
272
282
sm = br. sizedresults. results
273
283
pmap (is -> dot3_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
@@ -306,7 +316,7 @@ function sse_bench!(br, s, i)
306
316
@assert jOLSlp_avx (y, X, β) ≈ lpblas " LoopVec wrong?"
307
317
end
308
318
function benchmark_sse (sizes)
309
- tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " Clang ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
319
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " g ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
310
320
br = BenchmarkResult (tests, sizes)
311
321
sm = br. sizedresults. results
312
322
pmap (is -> sse_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
@@ -360,7 +370,7 @@ function aplusBc_bench!(br, s, i)
360
370
@assert D ≈ Dcopy " LoopVec wrong?"
361
371
end
362
372
function benchmark_aplusBc (sizes)
363
- tests = [" Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " Clang ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
373
+ tests = [" Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " g ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
364
374
br = BenchmarkResult (tests, sizes)
365
375
sm = br. sizedresults. results
366
376
pmap (is -> aplusBc_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
@@ -392,7 +402,7 @@ function AplusAt_bench!(br, s, i)
392
402
@assert B ≈ baseB " LoopVec wrong?"
393
403
end
394
404
function benchmark_AplusAt (sizes)
395
- tests = [" Julia" , " Clang-Polly" , " GFortran" , " GFortran-builtin" , " icc" , " ifort" , " ifort-builtin" , " Clang ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
405
+ tests = [" Julia" , " Clang-Polly" , " GFortran" , " GFortran-builtin" , " icc" , " ifort" , " ifort-builtin" , " g ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
396
406
br = BenchmarkResult (tests, sizes)
397
407
sm = br. sizedresults. results
398
408
pmap (is -> AplusAt_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
@@ -453,3 +463,65 @@ function benchmark_logdettriangle(sizes)
453
463
br
454
464
end
455
465
466
+
467
+ function filter2d_bench_run! (br, s, i, K)
468
+ A = rand (s + 2 , s + 2 )
469
+ B = OffsetArray (similar (A, (s,s)), 1 , 1 )
470
+ Mk, Nk = size (K)
471
+ n_gflop = 1e-9 * (2 Mk * Nk - 1 ) * s^ 2
472
+ br[1 ,i] = n_gflop / @belapsed filter2d! ($ B, $ A, $ K)
473
+ Bcopy = copy (B); fill! (B, NaN );
474
+ br[2 ,i] = n_gflop / @belapsed cfilter2d! ($ B, $ A, $ K)
475
+ @assert B ≈ Bcopy " Clang wrong?"
476
+ br[3 ,i] = n_gflop / @belapsed ffilter2d! ($ B, $ A, $ K)
477
+ @assert B ≈ Bcopy " Fort wrong?"
478
+ br[4 ,i] = n_gflop / @belapsed icfilter2d! ($ B, $ A, $ K)
479
+ @assert B ≈ Bcopy " icc wrong?"
480
+ br[5 ,i] = n_gflop / @belapsed iffilter2d! ($ B, $ A, $ K)
481
+ @assert B ≈ Bcopy " ifort wrong?"
482
+ br[6 ,i] = n_gflop / @belapsed filter2davx! ($ B, $ A, $ K)
483
+ @assert B ≈ Bcopy " LoopVec wrong?"
484
+ end
485
+ function benchmark_filter2d (sizes, K)
486
+ tests = [" Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " LoopVectorization" ]
487
+ br = BenchmarkResult (tests, sizes)
488
+ sm = br. sizedresults. results
489
+ pmap (is -> filter2d_bench_run! (sm, is[2 ], is[1 ], K), enumerate (sizes))
490
+ br
491
+ end
492
+
493
+ function benchmark_filter2ddynamic (sizes)
494
+ K = OffsetArray (rand (Float64, 3 , 3 ), - 1 : 1 , - 1 : 1 )
495
+ benchmark_filter2d (sizes, K)
496
+ end
497
+ function benchmark_filter2d3x3 (sizes)
498
+ K = SizedOffsetMatrix {Float64,-1,1,-1,1} (rand (3 ,3 ))
499
+ benchmark_filter2d (sizes, K)
500
+ end
501
+
502
+ function filter2dunrolled_bench_run! (br, s, i, K)
503
+ A = rand (s + 2 , s + 2 )
504
+ B = OffsetArray (similar (A, (s,s)), 1 , 1 )
505
+ Mk, Nk = size (K)
506
+ n_gflop = 1e-9 * (2 Mk * Nk - 1 ) * s^ 2
507
+ br[1 ,i] = n_gflop / @belapsed filter2dunrolled! ($ B, $ A, $ K)
508
+ Bcopy = copy (B); fill! (B, NaN );
509
+ br[2 ,i] = n_gflop / @belapsed cfilter2dunrolled! ($ B, $ A, $ K)
510
+ @assert B ≈ Bcopy " Clang wrong?"
511
+ br[3 ,i] = n_gflop / @belapsed ffilter2dunrolled! ($ B, $ A, $ K)
512
+ @assert B ≈ Bcopy " Fort wrong?"
513
+ br[4 ,i] = n_gflop / @belapsed icfilter2dunrolled! ($ B, $ A, $ K)
514
+ @assert B ≈ Bcopy " icc wrong?"
515
+ br[5 ,i] = n_gflop / @belapsed iffilter2dunrolled! ($ B, $ A, $ K)
516
+ @assert B ≈ Bcopy " ifort wrong?"
517
+ br[6 ,i] = n_gflop / @belapsed filter2dunrolledavx! ($ B, $ A, $ K)
518
+ @assert B ≈ Bcopy " LoopVec wrong?"
519
+ end
520
+ function benchmark_filter2dunrolled (sizes)
521
+ tests = [" Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " LoopVectorization" ]
522
+ br = BenchmarkResult (tests, sizes)
523
+ sm = br. sizedresults. results
524
+ K = SizedOffsetMatrix {Float64,-1,1,-1,1} (rand (3 ,3 ))
525
+ pmap (is -> filter2dunrolled_bench_run! (sm, is[2 ], is[1 ], K), enumerate (sizes))
526
+ br
527
+ end
0 commit comments