39
39
tothreetuple (i:: Int ) = (i,i,i)
40
40
tothreetuple (i:: NTuple{3,Int} ) = i
41
41
function benchmark_gemm (sizes)
42
- tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFort-loops " , " GFort -intrinsic" , " LoopVectorization" ]
42
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran " , " GFort-intrinsic " , " icc " , " ifort " , " ifort -intrinsic" , " LoopVectorization" ]
43
43
br = BenchmarkResult (tests, sizes)
44
44
for (i,s) ∈ enumerate (sizes)
45
45
M, K, N = tothreetuple (s)
@@ -57,7 +57,13 @@ function benchmark_gemm(sizes)
57
57
@assert C ≈ Cblas " Fort gemm wrong?"
58
58
br[5 ,i] = n_gflop / @belapsed fgemm_builtin! ($ C, $ A, $ B)
59
59
@assert C ≈ Cblas " Fort intrinsic gemm wrong?"
60
- br[6 ,i] = n_gflop / @belapsed gemmavx! ($ C, $ A, $ B)
60
+ br[6 ,i] = n_gflop / @belapsed icgemm_nkm! ($ C, $ A, $ B)
61
+ @assert C ≈ Cblas " icc gemm wrong?"
62
+ br[7 ,i] = n_gflop / @belapsed ifgemm_nkm! ($ C, $ A, $ B)
63
+ @assert C ≈ Cblas " ifort gemm wrong?"
64
+ br[8 ,i] = n_gflop / @belapsed ifgemm_builtin! ($ C, $ A, $ B)
65
+ @assert C ≈ Cblas " ifort intrinsic gemm wrong?"
66
+ br[9 ,i] = n_gflop / @belapsed gemmavx! ($ C, $ A, $ B)
61
67
@assert C ≈ Cblas " LoopVec gemm wrong?"
62
68
# if i % 10 == 0
63
69
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
@@ -67,7 +73,7 @@ function benchmark_gemm(sizes)
67
73
br
68
74
end
69
75
function benchmark_AtmulB (sizes)
70
- tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFort-loops " , " GFort -intrinsic" , " LoopVectorization" ]
76
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran " , " GFort-intrinsic " , " icc " , " ifort " , " ifort -intrinsic" , " LoopVectorization" ]
71
77
br = BenchmarkResult (tests, sizes)
72
78
for (i,s) ∈ enumerate (sizes)
73
79
M, K, N = tothreetuple (s)
@@ -85,7 +91,13 @@ function benchmark_AtmulB(sizes)
85
91
@assert C ≈ Cblas " Fort gemm wrong?"
86
92
br[5 ,i] = n_gflop / @belapsed fAtmulB_builtin! ($ C, $ At, $ B)
87
93
@assert C ≈ Cblas " Fort intrinsic gemm wrong?"
88
- br[6 ,i] = n_gflop / @belapsed jAtmulBavx! ($ C, $ At, $ B)
94
+ br[6 ,i] = n_gflop / @belapsed cAtmulB! ($ C, $ At, $ B)
95
+ @assert C ≈ Cblas " icc gemm wrong?"
96
+ br[7 ,i] = n_gflop / @belapsed ifAtmulB! ($ C, $ At, $ B)
97
+ @assert C ≈ Cblas " iort gemm wrong?"
98
+ br[8 ,i] = n_gflop / @belapsed ifAtmulB_builtin! ($ C, $ At, $ B)
99
+ @assert C ≈ Cblas " ifort intrinsic gemm wrong?"
100
+ br[9 ,i] = n_gflop / @belapsed jAtmulBavx! ($ C, $ At, $ B)
89
101
@assert C ≈ Cblas " LoopVec gemm wrong?"
90
102
# if i % 10 == 0
91
103
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
@@ -96,7 +108,7 @@ function benchmark_AtmulB(sizes)
96
108
end
97
109
98
110
function benchmark_dot (sizes)
99
- tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFort-loops " , " LoopVectorization" ]
111
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran " , " icc " , " ifort " , " LoopVectorization" ]
100
112
br = BenchmarkResult (tests, sizes)
101
113
for (i,s) ∈ enumerate (sizes)
102
114
a = rand (s); b = rand (s);
@@ -109,7 +121,11 @@ function benchmark_dot(sizes)
109
121
@assert cdot (a,b) ≈ dotblas " Polly dot wrong?"
110
122
br[4 ,i] = n_gflop / @belapsed fdot ($ a, $ b)
111
123
@assert fdot (a,b) ≈ dotblas " Fort dot wrong?"
112
- br[5 ,i] = n_gflop / @belapsed jdotavx ($ a, $ b)
124
+ br[5 ,i] = n_gflop / @belapsed icdot ($ a, $ b)
125
+ @assert cdot (a,b) ≈ dotblas " icc dot wrong?"
126
+ br[6 ,i] = n_gflop / @belapsed ifdot ($ a, $ b)
127
+ @assert fdot (a,b) ≈ dotblas " ifort dot wrong?"
128
+ br[7 ,i] = n_gflop / @belapsed jdotavx ($ a, $ b)
113
129
@assert jdotavx (a,b) ≈ dotblas " LoopVec dot wrong?"
114
130
# if i % 10 == 0
115
131
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
@@ -119,7 +135,7 @@ function benchmark_dot(sizes)
119
135
br
120
136
end
121
137
function benchmark_selfdot (sizes)
122
- tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFort-loops " , " LoopVectorization" ]
138
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran " , " icc " , " ifort " , " LoopVectorization" ]
123
139
br = BenchmarkResult (tests, sizes)
124
140
for (i,s) ∈ enumerate (sizes)
125
141
a = rand (s);
@@ -132,7 +148,11 @@ function benchmark_selfdot(sizes)
132
148
@assert cselfdot (a) ≈ dotblas " Polly dot wrong?"
133
149
br[4 ,i] = n_gflop / @belapsed fselfdot ($ a)
134
150
@assert fselfdot (a) ≈ dotblas " Fort dot wrong?"
135
- br[5 ,i] = n_gflop / @belapsed jselfdotavx ($ a)
151
+ br[5 ,i] = n_gflop / @belapsed icselfdot ($ a)
152
+ @assert cselfdot (a) ≈ dotblas " icc dot wrong?"
153
+ br[6 ,i] = n_gflop / @belapsed ifselfdot ($ a)
154
+ @assert fselfdot (a) ≈ dotblas " ifort dot wrong?"
155
+ br[7 ,i] = n_gflop / @belapsed jselfdotavx ($ a)
136
156
@assert jselfdotavx (a) ≈ dotblas " LoopVec dot wrong?"
137
157
# if i % 10 == 0
138
158
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
144
164
totwotuple (i:: Int ) = (i,i)
145
165
totwotuple (i:: Tuple{Int,Int} ) = i
146
166
function benchmark_gemv (sizes)
147
- tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFort-loops " , " LoopVectorization" ]
167
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran " , " icc " , " ifort " , " LoopVectorization" ]
148
168
br = BenchmarkResult (tests, sizes)
149
169
for (i,s) ∈ enumerate (sizes)
150
170
M, N = totwotuple (s)
@@ -158,7 +178,11 @@ function benchmark_gemv(sizes)
158
178
@assert x ≈ xblas " Polly wrong?"
159
179
br[4 ,i] = n_gflop / @belapsed fgemv! ($ x, $ A, $ y)
160
180
@assert x ≈ xblas " Fort wrong?"
161
- br[5 ,i] = n_gflop / @belapsed jgemvavx! ($ x, $ A, $ y)
181
+ br[5 ,i] = n_gflop / @belapsed icgemv! ($ x, $ A, $ y)
182
+ @assert x ≈ xblas " icc wrong?"
183
+ br[6 ,i] = n_gflop / @belapsed ifgemv! ($ x, $ A, $ y)
184
+ @assert x ≈ xblas " ifort wrong?"
185
+ br[7 ,i] = n_gflop / @belapsed jgemvavx! ($ x, $ A, $ y)
162
186
@assert x ≈ xblas " LoopVec wrong?"
163
187
# if i % 10 == 0
164
188
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
@@ -168,7 +192,7 @@ function benchmark_gemv(sizes)
168
192
br
169
193
end
170
194
function benchmark_dot3 (sizes)
171
- tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFort-loops " , " LoopVectorization" ]
195
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran " , " icc " , " ifort " , " LoopVectorization" ]
172
196
br = BenchmarkResult (tests, sizes)
173
197
for (i,s) ∈ enumerate (sizes)
174
198
M, N = totwotuple (s)
@@ -182,7 +206,11 @@ function benchmark_dot3(sizes)
182
206
@assert cdot3 (x, A, y) ≈ dotblas " Polly dot wrong?"
183
207
br[4 ,i] = n_gflop / @belapsed fdot3 ($ x, $ A, $ y)
184
208
@assert fdot3 (x, A, y) ≈ dotblas " Fort dot wrong?"
185
- br[5 ,i] = n_gflop / @belapsed jdot3avx ($ x, $ A, $ y)
209
+ br[5 ,i] = n_gflop / @belapsed icdot3 ($ x, $ A, $ y)
210
+ @assert cdot3 (x, A, y) ≈ dotblas " icc dot wrong?"
211
+ br[6 ,i] = n_gflop / @belapsed ifdot3 ($ x, $ A, $ y)
212
+ @assert fdot3 (x, A, y) ≈ dotblas " ifort dot wrong?"
213
+ br[7 ,i] = n_gflop / @belapsed jdot3avx ($ x, $ A, $ y)
186
214
@assert jdot3avx (x, A, y) ≈ dotblas " LoopVec dot wrong?"
187
215
# if i % 10 == 0
188
216
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
@@ -196,7 +224,7 @@ function sse!(Xβ, y, X, β)
196
224
dot (Xβ, Xβ)
197
225
end
198
226
function benchmark_sse (sizes)
199
- tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFort-loops " , " LoopVectorization" ]
227
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran " , " icc " , " ifort " , " LoopVectorization" ]
200
228
br = BenchmarkResult (tests, sizes)
201
229
for (i,s) ∈ enumerate (sizes)
202
230
N, P = totwotuple (s)
@@ -212,7 +240,11 @@ function benchmark_sse(sizes)
212
240
@assert cOLSlp (y, X, β) ≈ lpblas " Polly wrong?"
213
241
br[4 ,i] = n_gflop / @belapsed fOLSlp ($ y, $ X, $ β)
214
242
@assert fOLSlp (y, X, β) ≈ lpblas " Fort wrong?"
215
- br[5 ,i] = n_gflop / @belapsed jOLSlp_avx ($ y, $ X, $ β)
243
+ br[5 ,i] = n_gflop / @belapsed icOLSlp ($ y, $ X, $ β)
244
+ @assert cOLSlp (y, X, β) ≈ lpblas " icc wrong?"
245
+ br[6 ,i] = n_gflop / @belapsed ifOLSlp ($ y, $ X, $ β)
246
+ @assert fOLSlp (y, X, β) ≈ lpblas " ifort wrong?"
247
+ br[7 ,i] = n_gflop / @belapsed jOLSlp_avx ($ y, $ X, $ β)
216
248
@assert jOLSlp_avx (y, X, β) ≈ lpblas " LoopVec wrong?"
217
249
# if i % 10 == 0
218
250
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
@@ -223,7 +255,7 @@ function benchmark_sse(sizes)
223
255
end
224
256
225
257
function benchmark_exp (sizes)
226
- tests = [" Julia" , " Clang-Polly" , " GFort-loops " , " LoopVectorization" ]
258
+ tests = [" Julia" , " Clang-Polly" , " GFortran " , " icc " , " ifort " , " LoopVectorization" ]
227
259
br = BenchmarkResult (tests, sizes)
228
260
for (i,s) ∈ enumerate (sizes)
229
261
a = rand (s); b = similar (a)
@@ -234,7 +266,11 @@ function benchmark_exp(sizes)
234
266
@assert b ≈ baseb " Clang wrong?"
235
267
br[3 ,i] = n_gflop / @belapsed fvexp! ($ b, $ a)
236
268
@assert b ≈ baseb " Fort wrong?"
237
- br[4 ,i] = n_gflop / @belapsed @avx @. $ b = exp ($ a)
269
+ br[4 ,i] = n_gflop / @belapsed icvexp! ($ b, $ a)
270
+ @assert b ≈ baseb " icc wrong?"
271
+ br[5 ,i] = n_gflop / @belapsed ifvexp! ($ b, $ a)
272
+ @assert b ≈ baseb " ifort wrong?"
273
+ br[6 ,i] = n_gflop / @belapsed @avx @. $ b = exp ($ a)
238
274
@assert b ≈ baseb " LoopVec wrong?"
239
275
# if i % 10 == 0
240
276
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
@@ -245,7 +281,7 @@ function benchmark_exp(sizes)
245
281
end
246
282
247
283
function benchmark_aplusBc (sizes)
248
- tests = [" Julia" , " Clang-Polly" , " GFort-loops " , " LoopVectorization" ]
284
+ tests = [" Julia" , " Clang-Polly" , " GFortran " , " icc " , " ifort " , " LoopVectorization" ]
249
285
br = BenchmarkResult (tests, sizes)
250
286
for (i,s) ∈ enumerate (sizes)
251
287
M, N = totwotuple (s)
@@ -258,7 +294,11 @@ function benchmark_aplusBc(sizes)
258
294
@assert D ≈ Dcopy " Polly wrong?"
259
295
br[3 ,i] = n_gflop / @belapsed faplusBc! ($ D, $ a, $ B, $ c)
260
296
@assert D ≈ Dcopy " Fort wrong?"
261
- br[4 ,i] = n_gflop / @belapsed @avx @. $ D = $ a + $ B * $ c′
297
+ br[4 ,i] = n_gflop / @belapsed icaplusBc! ($ D, $ a, $ B, $ c)
298
+ @assert D ≈ Dcopy " icc wrong?"
299
+ br[5 ,i] = n_gflop / @belapsed ifaplusBc! ($ D, $ a, $ B, $ c)
300
+ @assert D ≈ Dcopy " ifort wrong?"
301
+ br[6 ,i] = n_gflop / @belapsed @avx @. $ D = $ a + $ B * $ c′
262
302
@assert D ≈ Dcopy " LoopVec wrong?"
263
303
# if i % 10 == 0
264
304
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
0 commit comments