@@ -30,6 +30,29 @@ Base.setindex!(br::BenchmarkResult, v, i...) = br.sizedresults.results[i...] = v
30
30
function Base. show (io:: IO , br:: BenchmarkResult )
31
31
pretty_table (io, br. sizedresults, br. tests)
32
32
end
33
+
34
+ using VegaLite, IndexedTables
35
+ function plot (br:: BenchmarkResult )
36
+ res = vec (br. sizedresults. results)
37
+ brsizes = br. sizedresults. sizes
38
+ sizes = Vector {eltype(brsizes)} (undef, length (res))
39
+ ntests = length (br. tests) - 1
40
+ for i ∈ 0 : length (brsizes)- 1
41
+ si = brsizes[i+ 1 ]
42
+ for j ∈ 1 : ntests
43
+ sizes[j + i* ntests] = si
44
+ end
45
+ end
46
+ tests = vcat ((@view (br. tests[2 : end ]) for _ ∈ eachindex (brsizes)). .. )
47
+ t = table ((GFLOPS = res, Size = sizes, Method = tests))
48
+ t |> @vlplot (
49
+ :line ,
50
+ x = :Size ,
51
+ y = :GFLOPS ,
52
+ color = :Method
53
+ )
54
+ end
55
+
33
56
function alloc_matrices (s:: NTuple{3,Int} )
34
57
M, K, N = s
35
58
C = Matrix {Float64} (undef, M, N)
@@ -38,8 +61,8 @@ function alloc_matrices(s::NTuple{3,Int})
38
61
C, A, B
39
62
end
40
63
alloc_matrices (s:: Int ) = alloc_matrices ((s,s,s))
41
- gflop (s:: Int ) = s^ 3 * 1e -9
42
- gflop (s:: NTuple{3,Int} ) = prod (s) * 1e -9
64
+ gflop (s:: Int ) = s^ 3 * 2e -9
65
+ gflop (s:: NTuple{3,Int} ) = prod (s) * 2e -9
43
66
function benchmark_gemm (sizes)
44
67
tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFort-loops" , " GFort-intrinsic" , " LoopVectorization" ]
45
68
br = BenchmarkResult (tests, sizes)
@@ -61,27 +84,108 @@ function benchmark_gemm(sizes)
61
84
end
62
85
br
63
86
end
87
+ function benchmark_dot (sizes)
88
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFort-loops" , " LoopVectorization" ]
89
+ br = BenchmarkResult (tests, sizes)
90
+ for (i,s) ∈ enumerate (sizes)
91
+ a = rand (s); b = rand (s);
92
+ n_gflop = s * 2e-9
93
+ br[1 ,i] = n_gflop / @belapsed dot ($ a, $ b)
94
+ dotblas = dot (a, b)
95
+ br[2 ,i] = n_gflop / @belapsed jdot ($ a, $ b)
96
+ @assert jdot (a,b) ≈ dotblas " Julia dot wrong?"
97
+ br[3 ,i] = n_gflop / @belapsed cdot ($ a, $ b)
98
+ @assert cdot (a,b) ≈ dotblas " Polly dot wrong?"
99
+ br[4 ,i] = n_gflop / @belapsed fdot ($ a, $ b)
100
+ @assert fdot (a,b) ≈ dotblas " Fort dot wrong?"
101
+ br[5 ,i] = n_gflop / @belapsed jdotavx ($ a, $ b)
102
+ @assert jdotavx (a,b) ≈ dotblas " LoopVec dot wrong?"
103
+ end
104
+ br
105
+ end
106
+ function benchmark_selfdot (sizes)
107
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFort-loops" , " LoopVectorization" ]
108
+ br = BenchmarkResult (tests, sizes)
109
+ for (i,s) ∈ enumerate (sizes)
110
+ a = rand (s);
111
+ n_gflop = s * 2e-9
112
+ br[1 ,i] = n_gflop / @belapsed dot ($ a, $ a)
113
+ dotblas = dot (a, a)
114
+ br[2 ,i] = n_gflop / @belapsed jselfdot ($ a)
115
+ @assert jselfdot (a) ≈ dotblas " Julia dot wrong?"
116
+ br[3 ,i] = n_gflop / @belapsed cselfdot ($ a)
117
+ @assert cselfdot (a) ≈ dotblas " Polly dot wrong?"
118
+ br[4 ,i] = n_gflop / @belapsed fselfdot ($ a)
119
+ @assert fselfdot (a) ≈ dotblas " Fort dot wrong?"
120
+ br[5 ,i] = n_gflop / @belapsed jselfdotavx ($ a)
121
+ @assert jselfdotavx (a) ≈ dotblas " LoopVec dot wrong?"
122
+ end
123
+ br
124
+ end
125
+ totwotuple (i:: Int ) = (i,i)
126
+ totwotuple (i:: Tuple{Int,Int} ) = i
127
+ function sse! (Xβ, y, X, β)
128
+ mul! (copyto! (Xβ, y), X, β, 1.0 , - 1.0 )
129
+ dot (Xβ, Xβ)
130
+ end
131
+ function benchmark_sse (sizes)
132
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFort-loops" , " GFort-intrinsic" , " LoopVectorization" ]
133
+ br = BenchmarkResult (tests, sizes)
134
+ for (i,s) ∈ enumerate (sizes)
135
+ N, P = totwotuple (s)
136
+ y = rand (N); β = rand (P)
137
+ X = randn (N, P)
138
+ Xβ = similar (y)
139
+ n_gflop = 2e-9 * (P* N + 2 N)
140
+ br[1 ,i] = n_gflop / @belapsed sse! ($ Xβ, $ y, $ X, $ β)
141
+ lpblas = sse! (Xβ, y, X, β)
142
+ br[2 ,i] = n_gflop / @belapsed jOLSlp ($ y, $ X, $ β)
143
+ @assert jOLSlp (y, X, β) ≈ lpblas " Julia wrong?"
144
+ br[3 ,i] = n_gflop / @belapsed cOLSlp ($ y, $ X, $ β)
145
+ @assert cOLSlp (y, X, β) ≈ lpblas " Polly wrong?"
146
+ br[4 ,i] = n_gflop / @belapsed fOLSlp ($ y, $ X, $ β)
147
+ @assert fOLSlp (y, X, β) ≈ lpblas " Fort wrong?"
148
+ br[5 ,i] = n_gflop / @belapsed jOLSlp_avx ($ y, $ X, $ β)
149
+ @assert jOLSlp_avx (y, X, β) ≈ lpblas " LoopVec wrong?"
150
+ end
151
+ br
152
+ end
64
153
65
- using VegaLite, IndexedTables
66
- function plot (br:: BenchmarkResult )
67
- res = vec (br. sizedresults. results)
68
- brsizes = br. sizedresults. sizes
69
- sizes = Vector {eltype(brsizes)} (undef, length (res))
70
- ntests = length (br. tests) - 1
71
- for i ∈ 0 : length (brsizes)- 1
72
- si = brsizes[i+ 1 ]
73
- for j ∈ 1 : ntests
74
- sizes[j + i* ntests] = si
75
- end
154
+ function benchmark_exp (sizes)
155
+ tests = [" Julia" , " GFort-loops" , " LoopVectorization" ]
156
+ br = BenchmarkResult (tests, sizes)
157
+ for (i,s) ∈ enumerate (sizes)
158
+ a = rand (s); b = similar (a)
159
+ n_gflop = s # not really gflops
160
+ br[1 ,i] = n_gflop / @belapsed @. $ b = exp ($ a)
161
+ baseb = copy (b)
162
+ br[2 ,i] = n_gflop / @belapsed fvexp! ($ b, $ a)
163
+ @assert b ≈ baseb " Fort wrong?"
164
+ br[3 ,i] = n_gflop / @belapsed @avx @. $ b = exp ($ a)
165
+ @assert b ≈ baseb " LoopVec wrong?"
76
166
end
77
- tests = vcat ((@view (br. tests[2 : end ]) for _ ∈ eachindex (brsizes)). .. )
78
- t = table ((GFLOPS = res, Size = sizes, Method = tests))
79
- t |> @vlplot (
80
- :line ,
81
- x = :Size ,
82
- y = :GFLOPS ,
83
- color = :Method
84
- )
167
+ br
168
+ end
169
+
170
+ function benchmark_aplusBc (sizes)
171
+ tests = [" Julia" , " Clang-Polly" , " GFort-loops" , " LoopVectorization" ]
172
+ br = BenchmarkResult (tests, sizes)
173
+ for (i,s) ∈ enumerate (sizes)
174
+ M, N = totwotuple (s)
175
+ a = rand (M); B = rand (M,N); c = rand (N);
176
+ c′ = c' ; D = similar (B)
177
+ n_gflop = 2e-9 * M* N
178
+ br[1 ,i] = n_gflop / @belapsed @. $ D = $ a + $ B * $ c′
179
+ Dcopy = copy (D)
180
+ br[2 ,i] = n_gflop / @belapsed caplusBc! ($ D, $ a, $ B, $ c)
181
+ @assert D ≈ Dcopy " Polly wrong?"
182
+ br[3 ,i] = n_gflop / @belapsed faplusBc! ($ D, $ a, $ B, $ c)
183
+ @assert D ≈ Dcopy " Fort wrong?"
184
+ br[4 ,i] = n_gflop / @belapsed @avx @. $ D = $ a + $ B * $ c′
185
+ @assert D ≈ Dcopy " LoopVec wrong?"
186
+ end
187
+ br
85
188
end
86
189
87
190
191
+
0 commit comments