@@ -75,7 +75,9 @@ function mygemmavx!(C, A, B)
75
75
C[i,j] = Cᵢⱼ
76
76
end
77
77
end
78
- C = Matrix {Float64} (undef, 100 , 100 ); A = randn (100 , 100 ); B = randn (100 , 100 );
78
+
79
+ M, K, N = rand (70 : 81 , 3 );
80
+ C = Matrix {Float64} (undef, M, N); A = randn (M, K); B = randn (K, N);
79
81
C2 = similar (C);
80
82
mygemmavx! (C, A, B)
81
83
mygemm! (C2, A, B)
@@ -97,7 +99,7 @@ LoopVectorization.choose_unroll_order(lsgemm)
97
99
ops = LoopVectorization. oporder (lsgemm);
98
100
findall (length .(ops) .!= 0 )
99
101
100
- dotq = :(for i ∈ eachindex (a)
102
+ dotq = :(for i ∈ eachindex (a,b )
101
103
s += a[i]* b[i]
102
104
end )
103
105
lsdot = LoopVectorization. LoopSet (dotq);
@@ -106,17 +108,15 @@ LoopVectorization.lower(lsdot)
106
108
lsdot. operations
107
109
108
110
function mydot (a, b)
109
- @assert length (a) == length (b) " Both arrays must be of equal length."
110
111
s = 0.0
111
- @inbounds @simd for i ∈ eachindex (a)
112
+ @inbounds @simd for i ∈ eachindex (a,b )
112
113
s += a[i]* b[i]
113
114
end
114
115
s
115
116
end
116
117
function mydotavx (a, b)
117
- @assert length (a) == length (b) " Both arrays must be of equal length."
118
118
s = 0.0
119
- @avx for i ∈ eachindex (a)
119
+ @avx for i ∈ eachindex (a,b )
120
120
s += a[i]* b[i]
121
121
end
122
122
s
@@ -153,13 +153,12 @@ function myselfdotavx(a)
153
153
s
154
154
end
155
155
156
- a = rand (400 ); b = rand ( 400 );
156
+ a = rand (400 );
157
157
@test myselfdotavx (a) ≈ myselfdot (a)
158
158
159
159
@benchmark myselfdotavx ($ a)
160
160
@benchmark myselfdot ($ a)
161
161
162
- b = rand (43 );
163
162
@benchmark myselfdotavx ($ b)
164
163
@benchmark myselfdot ($ b)
165
164
192
191
all (b1 .≈ b2)
193
192
@test all (b1 .≈ b2)
194
193
194
+ @benchmark myvexp! ($ b1, $ a)
195
+ @benchmark myvexpavx! ($ b2, $ a)
196
+
195
197
196
198
vexpsq = :(for i ∈ eachindex (a)
197
199
s += exp (a[i])
218
220
219
221
@test myvexp (a) ≈ myvexpavx (a)
220
222
221
-
223
+ @benchmark myvexp ($ a)
224
+ @benchmark myvexpavx ($ a)
222
225
223
226
gemvq = :(for i ∈ eachindex (y)
224
227
yᵢ = 0.0
@@ -258,6 +261,43 @@ mygemvavx!(y2, A, x)
258
261
259
262
@test all (y1 .≈ y2)
260
263
264
+ @benchmark mygemv! ($ y1, $ A, $ x)
265
+ @benchmark mygemvavx! ($ y2, $ A, $ x)
266
+
267
+ subcolq = :(for i ∈ 1 : size (A,2 ), j ∈ eachindex (x)
268
+ B[j,i] = A[j,i] - x[j]
269
+ end )
270
+ lssubcol = LoopVectorization. LoopSet (subcolq);
271
+ @test LoopVectorization. choose_order (lssubcol) == (Symbol[:j ,:i ], 4 , - 1 )
272
+ LoopVectorization. lower (lssubcol)
273
+
274
+ function mysubcol! (B, A, x)
275
+ @inbounds for i ∈ 1 : size (A,2 )
276
+ @simd for j ∈ eachindex (x)
277
+ B[j,i] = A[j,i] - x[j]
278
+ end
279
+ end
280
+ end
281
+ function mysubcolavx! (B, A, x)
282
+ @avx for i ∈ 1 : size (A,2 ), j ∈ eachindex (x)
283
+ B[j,i] = A[j,i] - x[j]
284
+ end
285
+ end
286
+ A = randn (199 , 498 ); x = randn (size (A,1 ));
287
+ B1 = similar (A); B2 = similar (A);
288
+
289
+ mysubcol! (B1, A, x)
290
+ mysubcolavx! (B2, A, x)
291
+
292
+ @test all (B1 .≈ B2)
293
+
294
+ @benchmark mysubcol! ($ B1, $ A, $ x)
295
+ @benchmark mysubcolavx! ($ B2, $ A, $ x)
296
+
297
+ @code_native debuginfo= :none mysubcol! (B1, A, x)
298
+ @code_native debuginfo= :none mysubcolavx! (B2, A, x)
299
+
300
+
261
301
lsgemv. preamble
262
302
LoopVectorization. lower (lsgemv)
263
303
LoopVectorization. lower_unrolled (lsgemv, 4 );
0 commit comments