Skip to content

Commit e043283

Browse files
committed
Improvements to benchmark script, making it easier to not require Intel compilers so they can be cross platform, and cost modeling improvements for noncontiguous ops on short vectors.
1 parent 23cb167 commit e043283

File tree

7 files changed

+458
-258
lines changed

7 files changed

+458
-258
lines changed

benchmark/benchmarkflops.jl

Lines changed: 118 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -49,22 +49,29 @@ function matmul_bench!(br, C, A, B, i)
4949
@assert C Cblas "Clang gemm wrong?"; fill!(C, NaN)
5050
br[4,i] = n_gflop / @belapsed fgemm!($C, $A, $B)
5151
@assert C Cblas "Fort gemm wrong?"; fill!(C, NaN)
52-
br[5,i] = n_gflop / @belapsed icgemm!($C, $A, $B)
53-
@assert C Cblas "icc gemm wrong?"; fill!(C, NaN)
54-
br[6,i] = n_gflop / @belapsed ifgemm!($C, $A, $B)
55-
@assert C Cblas "ifort gemm wrong?"; fill!(C, NaN)
56-
br[7,i] = n_gflop / @belapsed egemm!($C, $A, $B)
52+
if INTEL_BENCH
53+
br[5,i] = n_gflop / @belapsed icgemm!($C, $A, $B)
54+
@assert C Cblas "icc gemm wrong?"; fill!(C, NaN)
55+
br[6,i] = n_gflop / @belapsed ifgemm!($C, $A, $B)
56+
@assert C Cblas "ifort gemm wrong?"; fill!(C, NaN)
57+
end
58+
br[5+2INTEL_BENCH,i] = n_gflop / @belapsed egemm!($C, $A, $B)
5759
@assert C Cblas "eigen gemm wrong?"; fill!(C, NaN)
58-
br[8,i] = n_gflop / @belapsed iegemm!($C, $A, $B)
60+
br[6+2INTEL_BENCH,i] = n_gflop / @belapsed iegemm!($C, $A, $B)
5961
@assert C Cblas "i-eigen gemm wrong?"; fill!(C, NaN)
60-
br[9,i] = n_gflop / @belapsed fgemm_builtin!($C, $A, $B)
62+
63+
br[7+2INTEL_BENCH,i] = n_gflop / @belapsed fgemm_builtin!($C, $A, $B)
6164
@assert C Cblas "Fort builtin gemm wrong?"; fill!(C, NaN)
62-
br[10,i] = n_gflop / @belapsed ifgemm_builtin!($C, $A, $B)
63-
@assert C Cblas "ifort builtin gemm wrong?"; fill!(C, NaN)
64-
br[11,i] = n_gflop / @belapsed gemmopenblas!($C, $A, $B);
65+
if INTEL_BENCH
66+
br[8+2INTEL_BENCH,i] = n_gflop / @belapsed ifgemm_builtin!($C, $A, $B)
67+
@assert C Cblas "ifort builtin gemm wrong?"; fill!(C, NaN)
68+
end
69+
br[8+3INTEL_BENCH,i] = n_gflop / @belapsed gemmopenblas!($C, $A, $B);
6570
@assert C Cblas "OpenBLAS gemm wrong?"
66-
br[12,i] = n_gflop / @belapsed gemmmkl!($C, $A, $B)
67-
@assert C Cblas "MKL gemm wrong?"
71+
if MKL_BENCH
72+
br[9+3INTEL_BENCH,i] = n_gflop / @belapsed gemmmkl!($C, $A, $B)
73+
@assert C Cblas "MKL gemm wrong?"
74+
end
6875
# br[12,i] = n_gflop / @belapsed gemmavx!($C, $A, $B)
6976
end
7077
function A_mul_B_bench!(br, s, i)
@@ -116,13 +123,15 @@ function dot_bench!(br, s, i)
116123
@assert cdot(a,b) dotblas "Clang dot wrong?"
117124
br[4,i] = n_gflop / @belapsed fdot($a, $b)
118125
@assert fdot(a,b) dotblas "Fort dot wrong?"
119-
br[5,i] = n_gflop / @belapsed icdot($a, $b)
120-
@assert icdot(a,b) dotblas "icc dot wrong?"
121-
br[6,i] = n_gflop / @belapsed ifdot($a, $b)
122-
@assert ifdot(a,b) dotblas "ifort dot wrong?"
123-
br[7,i] = n_gflop / @belapsed edot($a, $b)
126+
if INTEL_BENCH
127+
br[5,i] = n_gflop / @belapsed icdot($a, $b)
128+
@assert icdot(a,b) dotblas "icc dot wrong?"
129+
br[6,i] = n_gflop / @belapsed ifdot($a, $b)
130+
@assert ifdot(a,b) dotblas "ifort dot wrong?"
131+
end
132+
br[5+2INTEL_BENCH,i] = n_gflop / @belapsed edot($a, $b)
124133
@assert edot(a,b) dotblas "eigen dot wrong?"
125-
br[8,i] = n_gflop / @belapsed iedot($a, $b)
134+
br[6+2INTEL_BENCH,i] = n_gflop / @belapsed iedot($a, $b)
126135
@assert iedot(a,b) dotblas "i-eigen dot wrong?"
127136
# br[9,i] = n_gflop / @belapsed dot($a, $b)
128137
end
@@ -138,13 +147,15 @@ function selfdot_bench!(br, s, i)
138147
@assert cselfdot(a) dotblas "Clang dot wrong?"
139148
br[4,i] = n_gflop / @belapsed fselfdot($a)
140149
@assert fselfdot(a) dotblas "Fort dot wrong?"
141-
br[5,i] = n_gflop / @belapsed icselfdot($a)
142-
@assert cselfdot(a) dotblas "icc dot wrong?"
143-
br[6,i] = n_gflop / @belapsed ifselfdot($a)
144-
@assert fselfdot(a) dotblas "ifort dot wrong?"
145-
br[7,i] = n_gflop / @belapsed eselfdot($a)
150+
if INTEL_BENCH
151+
br[5,i] = n_gflop / @belapsed icselfdot($a)
152+
@assert cselfdot(a) dotblas "icc dot wrong?"
153+
br[6,i] = n_gflop / @belapsed ifselfdot($a)
154+
@assert fselfdot(a) dotblas "ifort dot wrong?"
155+
end
156+
br[5+2INTEL_BENCH,i] = n_gflop / @belapsed eselfdot($a)
146157
@assert eselfdot(a) dotblas "eigen dot wrong?"
147-
br[8,i] = n_gflop / @belapsed ieselfdot($a)
158+
br[6+2INTEL_BENCH,i] = n_gflop / @belapsed ieselfdot($a)
148159
@assert ieselfdot(a) dotblas "i-eigen dot wrong?"
149160
# br[9,i] = n_gflop / @belapsed dot($a, $a)
150161
end
@@ -163,21 +174,23 @@ function gemv_bench!(br, x, A, y, i)
163174
@assert x xblas "Clang wrong?"; fill!(x, NaN);
164175
br[4,i] = n_gflop / @belapsed fgemv!($x, $A, $y)
165176
@assert x xblas "Fort wrong?"; fill!(x, NaN);
166-
br[5,i] = n_gflop / @belapsed icgemv!($x, $A, $y)
167-
@assert x xblas "icc wrong?"; fill!(x, NaN);
168-
br[6,i] = n_gflop / @belapsed ifgemv!($x, $A, $y)
169-
@assert x xblas "ifort wrong?"; fill!(x, NaN);
170-
br[7,i] = n_gflop / @belapsed egemv!($x, $A, $y)
177+
if INTEL_BENCH
178+
br[5,i] = n_gflop / @belapsed icgemv!($x, $A, $y)
179+
@assert x xblas "icc wrong?"; fill!(x, NaN);
180+
br[6,i] = n_gflop / @belapsed ifgemv!($x, $A, $y)
181+
@assert x xblas "ifort wrong?"; fill!(x, NaN);
182+
end
183+
br[5+2INTEL_BENCH,i] = n_gflop / @belapsed egemv!($x, $A, $y)
171184
@assert x xblas "eigen wrong?"; fill!(x, NaN);
172-
br[8,i] = n_gflop / @belapsed iegemv!($x, $A, $y)
185+
br[6+2INTEL_BENCH,i] = n_gflop / @belapsed iegemv!($x, $A, $y)
173186
@assert x xblas "i-eigen wrong?"; fill!(x, NaN);
174-
br[9,i] = n_gflop / @belapsed fgemv_builtin!($x, $A, $y)
187+
br[7+2INTEL_BENCH,i] = n_gflop / @belapsed fgemv_builtin!($x, $A, $y)
175188
@assert x xblas "Fort wrong?"; fill!(x, NaN);
176-
br[10,i] = n_gflop / @belapsed ifgemv_builtin!($x, $A, $y)
189+
br[8+2INTEL_BENCH,i] = n_gflop / @belapsed ifgemv_builtin!($x, $A, $y)
177190
@assert x xblas "ifort wrong?"; fill!(x, NaN);
178-
br[11,i] = n_gflop / @belapsed dgemvopenblas!($x, $A, $y)
191+
br[9+2INTEL_BENCH,i] = n_gflop / @belapsed dgemvopenblas!($x, $A, $y)
179192
@assert x xblas "gemvopenblas wrong?"; fill!(x, NaN);
180-
br[12,i] = n_gflop / @belapsed dgemvmkl!($x, $A, $y)
193+
br[10+2INTEL_BENCH,i] = n_gflop / @belapsed dgemvmkl!($x, $A, $y)
181194
@assert x xblas "gemvmkl wrong?"; fill!(x, NaN);
182195
end
183196
function A_mul_vb_bench!(br, s, i)
@@ -208,15 +221,17 @@ function dot3_bench!(br, s, i)
208221
@assert cdot3(x, A, y) dotblas "Clang dot wrong?"
209222
br[4,i] = n_gflop / @belapsed fdot3($x, $A, $y)
210223
@assert fdot3(x, A, y) dotblas "Fort dot wrong?"
211-
br[5,i] = n_gflop / @belapsed icdot3($x, $A, $y)
212-
@assert icdot3(x, A, y) dotblas "icc dot wrong?"
213-
br[6,i] = n_gflop / @belapsed ifdot3($x, $A, $y)
214-
@assert ifdot3(x, A, y) dotblas "ifort dot wrong?"
215-
br[7,i] = n_gflop / @belapsed edot3($x, $A, $y)
224+
if INTEL_BENCH
225+
br[5,i] = n_gflop / @belapsed icdot3($x, $A, $y)
226+
@assert icdot3(x, A, y) dotblas "icc dot wrong?"
227+
br[6,i] = n_gflop / @belapsed ifdot3($x, $A, $y)
228+
@assert ifdot3(x, A, y) dotblas "ifort dot wrong?"
229+
end
230+
br[5+2INTEL_BENCH,i] = n_gflop / @belapsed edot3($x, $A, $y)
216231
@assert edot3(x, A, y) dotblas "eigen dot wrong?"
217-
br[8,i] = n_gflop / @belapsed iedot3($x, $A, $y)
232+
br[6+2INTEL_BENCH,i] = n_gflop / @belapsed iedot3($x, $A, $y)
218233
@assert iedot3(x, A, y) dotblas "c-eigen dot wrong?"
219-
br[9,i] = n_gflop / @belapsed dot($x, $A, $y)
234+
br[7+2INTEL_BENCH,i] = n_gflop / @belapsed dot($x, $A, $y)
220235
end
221236
# BLAS.set_num_threads(1)
222237
function sse!(Xβ, y, X, β)
@@ -241,15 +256,20 @@ function sse_bench!(br, s, i)
241256
@assert cOLSlp(y, X, β) lpblas "Clang wrong?"
242257
br[4,i] = n_gflop / @belapsed fOLSlp($y, $X, $β)
243258
@assert fOLSlp(y, X, β) lpblas "Fort wrong?"
244-
br[5,i] = n_gflop / @belapsed icOLSlp($y, $X, $β)
245-
@assert icOLSlp(y, X, β) lpblas "icc wrong?"
246-
br[6,i] = n_gflop / @belapsed ifOLSlp($y, $X, $β)
247-
@assert ifOLSlp(y, X, β) lpblas "ifort wrong?"
248-
br[7,i] = n_gflop / @belapsed eOLSlp($y, $X, $β)
259+
if INTEL_BENCH
260+
br[5,i] = n_gflop / @belapsed icOLSlp($y, $X, $β)
261+
@assert icOLSlp(y, X, β) lpblas "icc wrong?"
262+
br[6,i] = n_gflop / @belapsed ifOLSlp($y, $X, $β)
263+
@assert ifOLSlp(y, X, β) lpblas "ifort wrong?"
264+
end
265+
br[5+2INTEL_BENCH,i] = n_gflop / @belapsed eOLSlp($y, $X, $β)
249266
@assert eOLSlp(y, X, β) lpblas "eigen wrong?"
250-
br[8,i] = n_gflop / @belapsed ieOLSlp($y, $X, $β)
267+
br[6+2INTEL_BENCH,i] = n_gflop / @belapsed ieOLSlp($y, $X, $β)
251268
@assert ieOLSlp(y, X, β) lpblas "i-eigen wrong?"
252-
br[9,i] = n_gflop / @belapsed sse!($Xβ, $y, $X, $β)
269+
if MKL_BENCH
270+
br[7+2INTEL_BENCH,i] = n_gflop / @belapsed sse!($Xβ, $y, $X, $β)
271+
@assert sse!(Xβ, y, X, β) lpblas "MKL wrong?"
272+
end
253273
end
254274

255275
function exp_bench!(br, s, i)
@@ -263,10 +283,12 @@ function exp_bench!(br, s, i)
263283
@assert b baseb "Clang wrong?"
264284
br[4,i] = n_gflop / @belapsed fvexp!($b, $a)
265285
@assert b baseb "Fort wrong?"
266-
br[5,i] = n_gflop / @belapsed icvexp!($b, $a)
267-
@assert b baseb "icc wrong?"
268-
br[6,i] = n_gflop / @belapsed ifvexp!($b, $a)
269-
@assert b baseb "ifort wrong?"
286+
if INTEL_BENCH
287+
br[5,i] = n_gflop / @belapsed icvexp!($b, $a)
288+
@assert b baseb "icc wrong?"
289+
br[6,i] = n_gflop / @belapsed ifvexp!($b, $a)
290+
@assert b baseb "ifort wrong?"
291+
end
270292
end
271293

272294
function aplusBc_bench!(br, s, i)
@@ -282,10 +304,12 @@ function aplusBc_bench!(br, s, i)
282304
@assert D Dcopy "Clang wrong?"; fill!(D, NaN);
283305
br[4,i] = n_gflop / @belapsed faplusBc!($D, $a, $B, $c)
284306
@assert D Dcopy "Fort wrong?"; fill!(D, NaN);
285-
br[5,i] = n_gflop / @belapsed icaplusBc!($D, $a, $B, $c)
286-
@assert D Dcopy "icc wrong?"; fill!(D, NaN);
287-
br[6,i] = n_gflop / @belapsed ifaplusBc!($D, $a, $B, $c)
288-
@assert D Dcopy "ifort wrong?"; fill!(D, NaN);
307+
if INTEL_BENCH
308+
br[5,i] = n_gflop / @belapsed icaplusBc!($D, $a, $B, $c)
309+
@assert D Dcopy "icc wrong?"; fill!(D, NaN);
310+
br[6,i] = n_gflop / @belapsed ifaplusBc!($D, $a, $B, $c)
311+
@assert D Dcopy "ifort wrong?"; fill!(D, NaN);
312+
end
289313
br[7,i] = n_gflop / @belapsed eaplusBc!($D, $a, $B, $c)
290314
@assert D Dcopy "eigen wrong?"; fill!(D, NaN);
291315
br[8,i] = n_gflop / @belapsed ieaplusBc!($D, $a, $B, $c)
@@ -303,18 +327,22 @@ function AplusAt_bench!(br, s, i)
303327
@assert B baseB "Clang wrong?"; fill!(B, NaN);
304328
br[4,i] = n_gflop / @belapsed fAplusAt!($B, $A)
305329
@assert B baseB "Fort wrong?"; fill!(B, NaN);
306-
br[5,i] = n_gflop / @belapsed icAplusAt!($B, $A)
307-
@assert B baseB "icc wrong?"; fill!(B, NaN);
308-
br[6,i] = n_gflop / @belapsed ifAplusAt!($B, $A)
309-
@assert B baseB "ifort wrong?"; fill!(B, NaN);
310-
br[7,i] = n_gflop / @belapsed eAplusAt!($B, $A)
330+
if INTEL_BENCH
331+
br[5,i] = n_gflop / @belapsed icAplusAt!($B, $A)
332+
@assert B baseB "icc wrong?"; fill!(B, NaN);
333+
br[6,i] = n_gflop / @belapsed ifAplusAt!($B, $A)
334+
@assert B baseB "ifort wrong?"; fill!(B, NaN);
335+
end
336+
br[5+2INTEL_BENCH,i] = n_gflop / @belapsed eAplusAt!($B, $A)
311337
@assert B baseB "eigen wrong?"; fill!(B, NaN);
312-
br[8,i] = n_gflop / @belapsed ieAplusAt!($B, $A)
338+
br[6+2INTEL_BENCH,i] = n_gflop / @belapsed ieAplusAt!($B, $A)
313339
@assert B baseB "i-eigen wrong?"; fill!(B, NaN);
314-
br[9,i] = n_gflop / @belapsed fAplusAt_builtin!($B, $A)
340+
br[7+2INTEL_BENCH,i] = n_gflop / @belapsed fAplusAt_builtin!($B, $A)
315341
@assert B baseB "Fort-builtin wrong?"; fill!(B, NaN);
316-
br[10,i] = n_gflop / @belapsed ifAplusAt_builtin!($B, $A)
317-
@assert B baseB "ifort-builtin wrong?"; fill!(B, NaN);
342+
if INTEL_BENCH
343+
br[8+2INTEL_BENCH,i] = n_gflop / @belapsed ifAplusAt_builtin!($B, $A)
344+
@assert B baseB "ifort-builtin wrong?"; fill!(B, NaN);
345+
end
318346
end
319347

320348
function randomaccess_bench!(br, s, i)
@@ -331,10 +359,12 @@ function randomaccess_bench!(br, s, i)
331359
@assert p crandomaccess(P, basis, coefs) "Clang wrong?"
332360
br[4,i] = n_gflop / @belapsed frandomaccess($P, $basis, $coefs)
333361
@assert p frandomaccess(P, basis, coefs) "Fort wrong?"
334-
br[5,i] = n_gflop / @belapsed icrandomaccess($P, $basis, $coefs)
335-
@assert p icrandomaccess(P, basis, coefs) "icc wrong?"
336-
br[6,i] = n_gflop / @belapsed ifrandomaccess($P, $basis, $coefs)
337-
@assert p ifrandomaccess(P, basis, coefs) "ifort wrong?"
362+
if INTEL_BENCH
363+
br[5,i] = n_gflop / @belapsed icrandomaccess($P, $basis, $coefs)
364+
@assert p icrandomaccess(P, basis, coefs) "icc wrong?"
365+
br[6,i] = n_gflop / @belapsed ifrandomaccess($P, $basis, $coefs)
366+
@assert p ifrandomaccess(P, basis, coefs) "ifort wrong?"
367+
end
338368
end
339369

340370
function logdettriangle_bench!(br, s, i)
@@ -350,15 +380,17 @@ function logdettriangle_bench!(br, s, i)
350380
@assert ld clogdettriangle(U) "Clang wrong?"
351381
br[4,i] = n_gflop / @belapsed flogdettriangle($U)
352382
@assert ld flogdettriangle(U) "Fort wrong?"
353-
br[5,i] = n_gflop / @belapsed iclogdettriangle($U)
354-
@assert ld iclogdettriangle(U) "icc wrong?"
355-
br[6,i] = n_gflop / @belapsed iflogdettriangle($U)
356-
@assert ld iflogdettriangle(U) "ifort wrong?"
383+
if INTEL_BENCH
384+
br[5,i] = n_gflop / @belapsed iclogdettriangle($U)
385+
@assert ld iclogdettriangle(U) "icc wrong?"
386+
br[6,i] = n_gflop / @belapsed iflogdettriangle($U)
387+
@assert ld iflogdettriangle(U) "ifort wrong?"
388+
end
357389
# br[7,i] = n_gflop / @belapsed elogdettriangle($U)
358390
# @assert ld ≈ elogdettriangle(U) "eigen wrong?"; fill!(B, NaN);
359391
# br[8,i] = n_gflop / @belapsed ielogdettriangle($U)
360392
# @assert ld ≈ ielogdettriangle(U) "i-eigen wrong?"; fill!(B, NaN);
361-
br[7,i] = n_gflop / @belapsed logdet($U)
393+
br[5+2INTEL_BENCH,i] = n_gflop / @belapsed logdet($U)
362394
end
363395

364396

@@ -375,10 +407,12 @@ function filter2d_bench_run!(br, s, i, K)
375407
@assert B Bcopy "Clang wrong?"
376408
br[4,i] = n_gflop / @belapsed ffilter2d!($B, $A, $K)
377409
@assert B Bcopy "Fort wrong?"
378-
br[5,i] = n_gflop / @belapsed icfilter2d!($B, $A, $K)
379-
@assert B Bcopy "icc wrong?"
380-
br[6,i] = n_gflop / @belapsed iffilter2d!($B, $A, $K)
381-
@assert B Bcopy "ifort wrong?"
410+
if INTEL_BENCH
411+
br[5,i] = n_gflop / @belapsed icfilter2d!($B, $A, $K)
412+
@assert B Bcopy "icc wrong?"
413+
br[6,i] = n_gflop / @belapsed iffilter2d!($B, $A, $K)
414+
@assert B Bcopy "ifort wrong?"
415+
end
382416
end
383417

384418

@@ -395,8 +429,10 @@ function filter2dunrolled_bench_run!(br, s, i, K)
395429
@assert B Bcopy "Clang wrong?"
396430
br[4,i] = n_gflop / @belapsed ffilter2dunrolled!($B, $A, $K)
397431
@assert B Bcopy "Fort wrong?"
398-
br[5,i] = n_gflop / @belapsed icfilter2dunrolled!($B, $A, $K)
399-
@assert B Bcopy "icc wrong?"
400-
br[6,i] = n_gflop / @belapsed iffilter2dunrolled!($B, $A, $K)
401-
@assert B Bcopy "ifort wrong?"
432+
if INTEL_BENCH
433+
br[5,i] = n_gflop / @belapsed icfilter2dunrolled!($B, $A, $K)
434+
@assert B Bcopy "icc wrong?"
435+
br[6,i] = n_gflop / @belapsed iffilter2dunrolled!($B, $A, $K)
436+
@assert B Bcopy "ifort wrong?"
437+
end
402438
end

0 commit comments

Comments
 (0)