Skip to content

Commit c1ce6ef

Browse files
committed
Faster gemm tests.
1 parent 8967f15 commit c1ce6ef

File tree

1 file changed

+11
-11
lines changed

1 file changed

+11
-11
lines changed

test/gemm.jl

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@
7272
end
7373
end
7474
function AmulBavx1!(C, A, B)
75-
@avx for m 1:size(A,1), n axes(B,2)
75+
@avx unroll=(1,2) for m 1:size(A,1), n axes(B,2)
7676
Cₘₙ = zero(eltype(C))
7777
for k 1:size(A,2)
7878
Cₘₙ += A[m,k] * B[k,n]
@@ -82,15 +82,15 @@
8282
end
8383
function AmulBavx2!(C, A, B)
8484
z = zero(eltype(C))
85-
@avx for m axes(A,1), n axes(B,2)
85+
@avx unroll=(2,1) for m axes(A,1), n axes(B,2)
8686
C[m,n] = z
8787
for k axes(A,2)
8888
C[m,n] += A[m,k] * B[k,n]
8989
end
9090
end
9191
end
9292
function AmulBavx3!(C, A, B)
93-
@avx for m axes(A,1), n axes(B,2)
93+
@avx unroll=(2,2) for m axes(A,1), n axes(B,2)
9494
C[m,n] = zero(eltype(C))
9595
for k axes(A,2)
9696
C[m,n] += A[m,k] * B[k,n]
@@ -115,7 +115,7 @@
115115
# C[m,n] += ΔCₘₙ * factor
116116
# end;
117117
function AmuladdBavx!(C, A, B, α = one(eltype(C)))
118-
@avx for m axes(A,1), n axes(B,2)
118+
@avx unroll=(2,2) for m axes(A,1), n axes(B,2)
119119
ΔCₘₙ = zero(eltype(C))
120120
for k axes(A,2)
121121
ΔCₘₙ += A[m,k] * B[k,n]
@@ -124,7 +124,7 @@
124124
end
125125
end
126126
function AmuladdBavx!(C, A, B, α, β)# = zero(eltype(C)))
127-
@avx for m axes(A,1), n axes(B,2)
127+
@avx unroll=(1,1) for m axes(A,1), n axes(B,2)
128128
ΔCₘₙ = zero(eltype(C))
129129
for k axes(A,2)
130130
ΔCₘₙ += A[m,k] * B[k,n]
@@ -160,7 +160,7 @@
160160
end
161161

162162
function AmulB_avx1!(C, A, B)
163-
@_avx for m 1:size(A,1), n 1:size(B,2)
163+
@_avx unroll=(2,2) for m 1:size(A,1), n 1:size(B,2)
164164
Cₘₙ = zero(eltype(C))
165165
for k axes(A,2)
166166
Cₘₙ += A[m,k] * B[k,n]
@@ -182,7 +182,7 @@
182182
# A = rand(M, M); B = rand(M, M); C = similar(A);
183183
function AmulB_avx2!(C, A, B)
184184
z = zero(eltype(C))
185-
@_avx for m axes(A,1), n axes(B,2)
185+
@_avx unroll=(2,2) for m axes(A,1), n axes(B,2)
186186
C[m,n] = z
187187
for k axes(A,2)
188188
C[m,n] += A[m,k] * B[k,n]
@@ -201,7 +201,7 @@
201201
# ls.operations[1]
202202
function AmulB_avx3!(C, A, B)
203203
Kmin = firstindex(axes(A,2)); Kmax = lastindex(axes(A,2))
204-
@_avx for m axes(A,1), n axes(B,2)
204+
@_avx unroll=(2,2) for m axes(A,1), n axes(B,2)
205205
C[m,n] = zero(eltype(C))
206206
for k Kmin:Kmax
207207
C[m,n] += A[m,k] * B[k,n]
@@ -224,7 +224,7 @@
224224
# end)
225225
# ls = LoopVectorization.LoopSet(q);
226226
function AmuladdB_avx!(C, A, B, factor = 1)
227-
@_avx for m axes(A,1), n axes(B,2)
227+
@_avx unroll=(2,2) for m axes(A,1), n axes(B,2)
228228
ΔCₘₙ = zero(eltype(C))
229229
for k axes(A,2)
230230
ΔCₘₙ += A[m,k] * B[k,n]
@@ -293,7 +293,7 @@
293293
# LoopVectorization.loopdependencies.(operations(atls))
294294
# LoopVectorization.reduceddependencies.(operations(atls))
295295
function AtmulB_avx1!(C, A, B)
296-
@_avx for n axes(C,2), m axes(C,1)
296+
@_avx unroll=(2,2) for n axes(C,2), m axes(C,1)
297297
Cₘₙ = zero(eltype(C))
298298
for k axes(A,1)
299299
Cₘₙ += A[k,m] * B[k,n]
@@ -308,7 +308,7 @@
308308
@assert size(A, 1) == size(B, 1)
309309
# When the @avx macro is available, this code is faster:
310310
z = zero(eltype(C))
311-
@avx for n in axes(C,2), m in axes(C,1)
311+
@avx unroll=(2,2) for n in axes(C,2), m in axes(C,1)
312312
Cmn = z
313313
for k in axes(A,1)
314314
Cmn += A[k,m] * B[k,n]

0 commit comments

Comments
 (0)