Skip to content

Commit d3a23f5

Browse files
committed
Updated tests.
1 parent b5aca04 commit d3a23f5

File tree

2 files changed

+33
-34
lines changed

2 files changed

+33
-34
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.2.3"
4+
version = "0.2.4"
55

66
[deps]
77
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

test/runtests.jl

Lines changed: 32 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -36,17 +36,17 @@ using LinearAlgebra
3636
@test logsumexp!(r, x) 102.35216846104409
3737

3838
@testset "GEMM" begin
39-
AmulBq = :(for i 1:size(A,1), j 1:size(B,2)
40-
Cᵢⱼ = zero(eltype(C))
41-
for k 1:size(A,2)
42-
Cᵢⱼ += A[i,k] * B[k,j]
43-
end
44-
C[i,j] = Cᵢⱼ
45-
end)
46-
39+
AmulBq = :(for m 1:size(A,1), n 1:size(B,2)
40+
Cₘₙ = zero(eltype(C))
41+
for k 1:size(A,2)
42+
Cₘₙ += A[m,k] * B[k,n]
43+
end
44+
C[m,n] = Cₘₙ
45+
end)
46+
4747
lsAmulB = LoopVectorization.LoopSet(AmulBq);
48-
U, T = LoopVectorization.VectorizationBase.REGISTER_COUNT == 16 ? (3,4) : (6, 4)
49-
@test LoopVectorization.choose_order(lsAmulB) == (Symbol[:j,:i,:k], :i, U, T)
48+
U, T = LoopVectorization.VectorizationBase.REGISTER_COUNT == 16 ? (3,4) : (4, 4)
49+
@test LoopVectorization.choose_order(lsAmulB) == (Symbol[:n,:m,:k], :m, U, T)
5050

5151
function AmulB!(C, A, B)
5252
C .= 0
@@ -57,12 +57,12 @@ using LinearAlgebra
5757
end
5858
end
5959
function AmulBavx!(C, A, B)
60-
@avx for i 1:size(A,1), j 1:size(B,2)
61-
Cᵢⱼ = zero(eltype(C))
60+
@avx for m 1:size(A,1), n 1:size(B,2)
61+
Cₘₙ = zero(eltype(C))
6262
for k 1:size(A,2)
63-
Cᵢⱼ += A[i,k] * B[k,j]
63+
Cₘₙ += A[m,k] * B[k,n]
6464
end
65-
C[i,j] = Cᵢⱼ
65+
C[m,n] = Cₘₙ
6666
end
6767
end
6868

@@ -75,43 +75,42 @@ using LinearAlgebra
7575
# C[i,j] = Cᵢⱼ
7676
# end
7777
# end
78-
AtmulBq = :(for j 1:size(C,2), i 1:size(C,1)
79-
Cᵢⱼ = zero(eltype(C))
78+
AtmulBq = :(for n 1:size(C,2), m 1:size(C,1)
79+
Cₘₙ = zero(eltype(C))
8080
for k 1:size(A,1)
81-
Cᵢⱼ += A[k,i] * B[k,j]
81+
Cₘₙ += A[k,m] * B[k,n]
8282
end
83-
C[i,j] = Cᵢⱼ
83+
C[m,n] = Cₘₙ
8484
end)
8585
lsAtmulB = LoopVectorization.LoopSet(AtmulBq);
8686
# LoopVectorization.choose_order(lsAtmulB)
87-
@test LoopVectorization.choose_order(lsAtmulB) == (Symbol[:j,:i,:k], :k, U, T)
87+
@test LoopVectorization.choose_order(lsAtmulB) == (Symbol[:m,:n,:k], :k, U, T)
8888

8989
function AtmulBavx!(C, A, B)
90-
@avx for j 1:size(C,2), i 1:size(C,1)
91-
Cᵢⱼ = zero(eltype(C))
90+
@avx for n 1:size(C,2), m 1:size(C,1)
91+
Cₘₙ = zero(eltype(C))
9292
for k 1:size(A,1)
93-
Cᵢⱼ += A[k,i] * B[k,j]
93+
Cₘₙ += A[k,m] * B[k,n]
9494
end
95-
C[i,j] = Cᵢⱼ
95+
C[m,n] = Cₘₙ
9696
end
97-
end
98-
97+
end
9998
function rank2AmulB!(C, Aₘ, Aₖ, B)
100-
@inbounds for i 1:size(C,1), j 1:size(C,2)
101-
Cᵢⱼ = zero(eltype(C))
99+
@inbounds for m 1:size(C,1), n 1:size(C,2)
100+
Cₘₙ = zero(eltype(C))
102101
@fastmath for k 1:size(B,1)
103-
Cᵢⱼ += (Aₘ[i,1]*Aₖ[1,k]+Aₘ[i,2]*Aₖ[2,k]) * B[k,j]
102+
Cₘₙ += (Aₘ[m,1]*Aₖ[1,k]+Aₘ[m,2]*Aₖ[2,k]) * B[k,n]
104103
end
105-
C[i,j] = Cᵢⱼ
104+
C[m,n] = Cₘₙ
106105
end
107106
end
108107
function rank2AmulBavx!(C, Aₘ, Aₖ, B)
109-
@avx for i 1:size(C,1), j 1:size(C,2)
110-
Cᵢⱼ = zero(eltype(C))
108+
@avx for m 1:size(C,1), n 1:size(C,2)
109+
Cₘₙ = zero(eltype(C))
111110
for k 1:size(B,1)
112-
Cᵢⱼ += (Aₘ[i,1]*Aₖ[1,k]+Aₘ[i,2]*Aₖ[2,k]) * B[k,j]
111+
Cₘₙ += (Aₘ[m,1]*Aₖ[1,k]+Aₘ[m,2]*Aₖ[2,k]) * B[k,n]
113112
end
114-
C[i,j] = Cᵢⱼ
113+
C[m,n] = Cₘₙ
115114
end
116115
end
117116

0 commit comments

Comments
 (0)