Skip to content

Commit 516216b

Browse files
committed
Update compat and versions, add intel compiler to benchmarks.
1 parent bb75698 commit 516216b

File tree

5 files changed

+261
-209
lines changed

5 files changed

+261
-209
lines changed

Manifest.toml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ version = "0.2.2"
1111

1212
[[DataStructures]]
1313
deps = ["InteractiveUtils", "OrderedCollections"]
14-
git-tree-sha1 = "f784254f428fb8fd7ac15982e5862a38a44523d3"
14+
git-tree-sha1 = "b7720de347734f4716d1815b00ce5664ed6bbfd4"
1515
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
16-
version = "0.17.7"
16+
version = "0.17.9"
1717

1818
[[Distributed]]
1919
deps = ["Random", "Serialization", "Sockets"]
@@ -61,15 +61,15 @@ uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
6161

6262
[[SIMDPirates]]
6363
deps = ["MacroTools", "VectorizationBase"]
64-
git-tree-sha1 = "c0f42ddb2645c54b8620979df5dc979c4742db59"
64+
git-tree-sha1 = "910193d289b41e570118c4e444f0c05cc700a2f7"
6565
uuid = "21efa798-c60a-11e8-04d3-e1a92915a26a"
66-
version = "0.1.4"
66+
version = "0.1.5"
6767

6868
[[SLEEFPirates]]
6969
deps = ["SIMDPirates", "VectorizationBase"]
70-
git-tree-sha1 = "547bcf7d30967d87d4c62b3fe5efdb0e57a6e436"
70+
git-tree-sha1 = "4733445246d3d5536c7aee1bffb55ab37b88347b"
7171
uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa"
72-
version = "0.1.2"
72+
version = "0.1.3"
7373

7474
[[Serialization]]
7575
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
@@ -83,6 +83,6 @@ uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
8383

8484
[[VectorizationBase]]
8585
deps = ["CpuId", "LinearAlgebra"]
86-
git-tree-sha1 = "81c1b3171d93e64345d75a9f08d190a155e9f009"
86+
git-tree-sha1 = "a2576763aa20968ffb5668e2e15d45ae8e364d05"
8787
uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
88-
version = "0.1.7"
88+
version = "0.1.9"

Project.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
1414
[compat]
1515
MacroTools = "0.5"
1616
Parameters = "0.12.0"
17-
SIMDPirates = "0.1.4"
18-
SLEEFPirates = "0.1.2"
19-
VectorizationBase = "0.1.7"
17+
SIMDPirates = "0.1.5"
18+
SLEEFPirates = "0.1.3"
19+
VectorizationBase = "0.1.9"
2020
julia = "1.3.0"
2121

2222
[extras]

benchmark/benchmarkflops.jl

Lines changed: 58 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ end
3939
tothreetuple(i::Int) = (i,i,i)
4040
tothreetuple(i::NTuple{3,Int}) = i
4141
function benchmark_gemm(sizes)
42-
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFort-loops", "GFort-intrinsic", "LoopVectorization"]
42+
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "GFort-intrinsic", "icc", "ifort", "ifort-intrinsic", "LoopVectorization"]
4343
br = BenchmarkResult(tests, sizes)
4444
for (i,s) enumerate(sizes)
4545
M, K, N = tothreetuple(s)
@@ -57,7 +57,13 @@ function benchmark_gemm(sizes)
5757
@assert C Cblas "Fort gemm wrong?"
5858
br[5,i] = n_gflop / @belapsed fgemm_builtin!($C, $A, $B)
5959
@assert C Cblas "Fort intrinsic gemm wrong?"
60-
br[6,i] = n_gflop / @belapsed gemmavx!($C, $A, $B)
60+
br[6,i] = n_gflop / @belapsed icgemm_nkm!($C, $A, $B)
61+
@assert C Cblas "icc gemm wrong?"
62+
br[7,i] = n_gflop / @belapsed ifgemm_nkm!($C, $A, $B)
63+
@assert C Cblas "ifort gemm wrong?"
64+
br[8,i] = n_gflop / @belapsed ifgemm_builtin!($C, $A, $B)
65+
@assert C Cblas "ifort intrinsic gemm wrong?"
66+
br[9,i] = n_gflop / @belapsed gemmavx!($C, $A, $B)
6167
@assert C Cblas "LoopVec gemm wrong?"
6268
# if i % 10 == 0
6369
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
@@ -67,7 +73,7 @@ function benchmark_gemm(sizes)
6773
br
6874
end
6975
function benchmark_AtmulB(sizes)
70-
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFort-loops", "GFort-intrinsic", "LoopVectorization"]
76+
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "GFort-intrinsic", "icc", "ifort", "ifort-intrinsic", "LoopVectorization"]
7177
br = BenchmarkResult(tests, sizes)
7278
for (i,s) enumerate(sizes)
7379
M, K, N = tothreetuple(s)
@@ -85,7 +91,13 @@ function benchmark_AtmulB(sizes)
8591
@assert C Cblas "Fort gemm wrong?"
8692
br[5,i] = n_gflop / @belapsed fAtmulB_builtin!($C, $At, $B)
8793
@assert C Cblas "Fort intrinsic gemm wrong?"
88-
br[6,i] = n_gflop / @belapsed jAtmulBavx!($C, $At, $B)
94+
br[6,i] = n_gflop / @belapsed cAtmulB!($C, $At, $B)
95+
@assert C Cblas "icc gemm wrong?"
96+
br[7,i] = n_gflop / @belapsed ifAtmulB!($C, $At, $B)
97+
@assert C Cblas "iort gemm wrong?"
98+
br[8,i] = n_gflop / @belapsed ifAtmulB_builtin!($C, $At, $B)
99+
@assert C Cblas "ifort intrinsic gemm wrong?"
100+
br[9,i] = n_gflop / @belapsed jAtmulBavx!($C, $At, $B)
89101
@assert C Cblas "LoopVec gemm wrong?"
90102
# if i % 10 == 0
91103
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
@@ -96,7 +108,7 @@ function benchmark_AtmulB(sizes)
96108
end
97109

98110
function benchmark_dot(sizes)
99-
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFort-loops", "LoopVectorization"]
111+
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "icc", "ifort", "LoopVectorization"]
100112
br = BenchmarkResult(tests, sizes)
101113
for (i,s) enumerate(sizes)
102114
a = rand(s); b = rand(s);
@@ -109,7 +121,11 @@ function benchmark_dot(sizes)
109121
@assert cdot(a,b) dotblas "Polly dot wrong?"
110122
br[4,i] = n_gflop / @belapsed fdot($a, $b)
111123
@assert fdot(a,b) dotblas "Fort dot wrong?"
112-
br[5,i] = n_gflop / @belapsed jdotavx($a, $b)
124+
br[5,i] = n_gflop / @belapsed icdot($a, $b)
125+
@assert cdot(a,b) dotblas "icc dot wrong?"
126+
br[6,i] = n_gflop / @belapsed ifdot($a, $b)
127+
@assert fdot(a,b) dotblas "ifort dot wrong?"
128+
br[7,i] = n_gflop / @belapsed jdotavx($a, $b)
113129
@assert jdotavx(a,b) dotblas "LoopVec dot wrong?"
114130
# if i % 10 == 0
115131
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
@@ -119,7 +135,7 @@ function benchmark_dot(sizes)
119135
br
120136
end
121137
function benchmark_selfdot(sizes)
122-
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFort-loops", "LoopVectorization"]
138+
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "icc", "ifort", "LoopVectorization"]
123139
br = BenchmarkResult(tests, sizes)
124140
for (i,s) enumerate(sizes)
125141
a = rand(s);
@@ -132,7 +148,11 @@ function benchmark_selfdot(sizes)
132148
@assert cselfdot(a) dotblas "Polly dot wrong?"
133149
br[4,i] = n_gflop / @belapsed fselfdot($a)
134150
@assert fselfdot(a) dotblas "Fort dot wrong?"
135-
br[5,i] = n_gflop / @belapsed jselfdotavx($a)
151+
br[5,i] = n_gflop / @belapsed icselfdot($a)
152+
@assert cselfdot(a) dotblas "icc dot wrong?"
153+
br[6,i] = n_gflop / @belapsed ifselfdot($a)
154+
@assert fselfdot(a) dotblas "ifort dot wrong?"
155+
br[7,i] = n_gflop / @belapsed jselfdotavx($a)
136156
@assert jselfdotavx(a) dotblas "LoopVec dot wrong?"
137157
# if i % 10 == 0
138158
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
@@ -144,7 +164,7 @@ end
144164
totwotuple(i::Int) = (i,i)
145165
totwotuple(i::Tuple{Int,Int}) = i
146166
function benchmark_gemv(sizes)
147-
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFort-loops", "LoopVectorization"]
167+
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "icc", "ifort", "LoopVectorization"]
148168
br = BenchmarkResult(tests, sizes)
149169
for (i,s) enumerate(sizes)
150170
M, N = totwotuple(s)
@@ -158,7 +178,11 @@ function benchmark_gemv(sizes)
158178
@assert x xblas "Polly wrong?"
159179
br[4,i] = n_gflop / @belapsed fgemv!($x, $A, $y)
160180
@assert x xblas "Fort wrong?"
161-
br[5,i] = n_gflop / @belapsed jgemvavx!($x, $A, $y)
181+
br[5,i] = n_gflop / @belapsed icgemv!($x, $A, $y)
182+
@assert x xblas "icc wrong?"
183+
br[6,i] = n_gflop / @belapsed ifgemv!($x, $A, $y)
184+
@assert x xblas "ifort wrong?"
185+
br[7,i] = n_gflop / @belapsed jgemvavx!($x, $A, $y)
162186
@assert x xblas "LoopVec wrong?"
163187
# if i % 10 == 0
164188
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
@@ -168,7 +192,7 @@ function benchmark_gemv(sizes)
168192
br
169193
end
170194
function benchmark_dot3(sizes)
171-
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFort-loops", "LoopVectorization"]
195+
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "icc", "ifort", "LoopVectorization"]
172196
br = BenchmarkResult(tests, sizes)
173197
for (i,s) enumerate(sizes)
174198
M, N = totwotuple(s)
@@ -182,7 +206,11 @@ function benchmark_dot3(sizes)
182206
@assert cdot3(x, A, y) dotblas "Polly dot wrong?"
183207
br[4,i] = n_gflop / @belapsed fdot3($x, $A, $y)
184208
@assert fdot3(x, A, y) dotblas "Fort dot wrong?"
185-
br[5,i] = n_gflop / @belapsed jdot3avx($x, $A, $y)
209+
br[5,i] = n_gflop / @belapsed icdot3($x, $A, $y)
210+
@assert cdot3(x, A, y) dotblas "icc dot wrong?"
211+
br[6,i] = n_gflop / @belapsed ifdot3($x, $A, $y)
212+
@assert fdot3(x, A, y) dotblas "ifort dot wrong?"
213+
br[7,i] = n_gflop / @belapsed jdot3avx($x, $A, $y)
186214
@assert jdot3avx(x, A, y) dotblas "LoopVec dot wrong?"
187215
# if i % 10 == 0
188216
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
@@ -196,7 +224,7 @@ function sse!(Xβ, y, X, β)
196224
dot(Xβ, Xβ)
197225
end
198226
function benchmark_sse(sizes)
199-
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFort-loops", "LoopVectorization"]
227+
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "icc", "ifort", "LoopVectorization"]
200228
br = BenchmarkResult(tests, sizes)
201229
for (i,s) enumerate(sizes)
202230
N, P = totwotuple(s)
@@ -212,7 +240,11 @@ function benchmark_sse(sizes)
212240
@assert cOLSlp(y, X, β) lpblas "Polly wrong?"
213241
br[4,i] = n_gflop / @belapsed fOLSlp($y, $X, $β)
214242
@assert fOLSlp(y, X, β) lpblas "Fort wrong?"
215-
br[5,i] = n_gflop / @belapsed jOLSlp_avx($y, $X, $β)
243+
br[5,i] = n_gflop / @belapsed icOLSlp($y, $X, $β)
244+
@assert cOLSlp(y, X, β) lpblas "icc wrong?"
245+
br[6,i] = n_gflop / @belapsed ifOLSlp($y, $X, $β)
246+
@assert fOLSlp(y, X, β) lpblas "ifort wrong?"
247+
br[7,i] = n_gflop / @belapsed jOLSlp_avx($y, $X, $β)
216248
@assert jOLSlp_avx(y, X, β) lpblas "LoopVec wrong?"
217249
# if i % 10 == 0
218250
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
@@ -223,7 +255,7 @@ function benchmark_sse(sizes)
223255
end
224256

225257
function benchmark_exp(sizes)
226-
tests = ["Julia", "Clang-Polly", "GFort-loops", "LoopVectorization"]
258+
tests = ["Julia", "Clang-Polly", "GFortran", "icc", "ifort", "LoopVectorization"]
227259
br = BenchmarkResult(tests, sizes)
228260
for (i,s) enumerate(sizes)
229261
a = rand(s); b = similar(a)
@@ -234,7 +266,11 @@ function benchmark_exp(sizes)
234266
@assert b baseb "Clang wrong?"
235267
br[3,i] = n_gflop / @belapsed fvexp!($b, $a)
236268
@assert b baseb "Fort wrong?"
237-
br[4,i] = n_gflop / @belapsed @avx @. $b = exp($a)
269+
br[4,i] = n_gflop / @belapsed icvexp!($b, $a)
270+
@assert b baseb "icc wrong?"
271+
br[5,i] = n_gflop / @belapsed ifvexp!($b, $a)
272+
@assert b baseb "ifort wrong?"
273+
br[6,i] = n_gflop / @belapsed @avx @. $b = exp($a)
238274
@assert b baseb "LoopVec wrong?"
239275
# if i % 10 == 0
240276
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
@@ -245,7 +281,7 @@ function benchmark_exp(sizes)
245281
end
246282

247283
function benchmark_aplusBc(sizes)
248-
tests = ["Julia", "Clang-Polly", "GFort-loops", "LoopVectorization"]
284+
tests = ["Julia", "Clang-Polly", "GFortran", "icc", "ifort", "LoopVectorization"]
249285
br = BenchmarkResult(tests, sizes)
250286
for (i,s) enumerate(sizes)
251287
M, N = totwotuple(s)
@@ -258,7 +294,11 @@ function benchmark_aplusBc(sizes)
258294
@assert D Dcopy "Polly wrong?"
259295
br[3,i] = n_gflop / @belapsed faplusBc!($D, $a, $B, $c)
260296
@assert D Dcopy "Fort wrong?"
261-
br[4,i] = n_gflop / @belapsed @avx @. $D = $a + $B * $c′
297+
br[4,i] = n_gflop / @belapsed icaplusBc!($D, $a, $B, $c)
298+
@assert D Dcopy "icc wrong?"
299+
br[5,i] = n_gflop / @belapsed ifaplusBc!($D, $a, $B, $c)
300+
@assert D Dcopy "ifort wrong?"
301+
br[6,i] = n_gflop / @belapsed @avx @. $D = $a + $B * $c′
262302
@assert D Dcopy "LoopVec wrong?"
263303
# if i % 10 == 0
264304
# percent_complete = round(100i/ length(sizes), sigdigits = 4)

benchmark/driver.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# includet(joinpath(LOOPVECBENCHDIR, "driver.jl"))
44

55
pkgdir(pkg::String) = abspath(joinpath(dirname(Base.find_package(pkg)), ".."))
6-
const LOOPVECBENCHDIR = joinpath(pkgdir("LoopVectorization"), "benchmarks")
6+
const LOOPVECBENCHDIR = joinpath(pkgdir("LoopVectorization"), "benchmark")
77
include(joinpath(LOOPVECBENCHDIR, "benchmarkflops.jl"))
88
include(joinpath(LOOPVECBENCHDIR, "plotbenchmarks.jl"))
99

@@ -14,8 +14,9 @@ addprocs(9);
1414

1515
@everywhere begin
1616
pkgdir(pkg::String) = abspath(joinpath(dirname(Base.find_package(pkg)), ".."))
17-
const LOOPVECBENCHDIR = joinpath(pkgdir("LoopVectorization"), "benchmarks")
17+
const LOOPVECBENCHDIR = joinpath(pkgdir("LoopVectorization"), "benchmark")
1818
include(joinpath(LOOPVECBENCHDIR, "benchmarkflops.jl"))
19+
BenchmarkTools.DEFAULT_PARAMETERS.seconds = 1
1920
end
2021

2122
gemm_future = @spawnat 2 benchmark_gemm(2:256);

0 commit comments

Comments
 (0)