@@ -36,17 +36,17 @@ using LinearAlgebra
36
36
@test logsumexp! (r, x) ≈ 102.35216846104409
37
37
38
38
@testset " GEMM" begin
39
- AmulBq = :(for i ∈ 1 : size (A,1 ), j ∈ 1 : size (B,2 )
40
- Cᵢⱼ = zero (eltype (C))
41
- for k ∈ 1 : size (A,2 )
42
- Cᵢⱼ += A[i ,k] * B[k,j ]
43
- end
44
- C[i,j ] = Cᵢⱼ
45
- end )
46
-
39
+ AmulBq = :(for m ∈ 1 : size (A,1 ), n ∈ 1 : size (B,2 )
40
+ Cₘₙ = zero (eltype (C))
41
+ for k ∈ 1 : size (A,2 )
42
+ Cₘₙ += A[m ,k] * B[k,n ]
43
+ end
44
+ C[m,n ] = Cₘₙ
45
+ end )
46
+
47
47
lsAmulB = LoopVectorization. LoopSet (AmulBq);
48
- U, T = LoopVectorization. VectorizationBase. REGISTER_COUNT == 16 ? (3 ,4 ) : (6 , 4 )
49
- @test LoopVectorization. choose_order (lsAmulB) == (Symbol[:j , :i ,:k ], :i , U, T)
48
+ U, T = LoopVectorization. VectorizationBase. REGISTER_COUNT == 16 ? (3 ,4 ) : (4 , 4 )
49
+ @test LoopVectorization. choose_order (lsAmulB) == (Symbol[:n , :m ,:k ], :m , U, T)
50
50
51
51
function AmulB! (C, A, B)
52
52
C .= 0
@@ -57,12 +57,12 @@ using LinearAlgebra
57
57
end
58
58
end
59
59
function AmulBavx! (C, A, B)
60
- @avx for i ∈ 1 : size (A,1 ), j ∈ 1 : size (B,2 )
61
- Cᵢⱼ = zero (eltype (C))
60
+ @avx for m ∈ 1 : size (A,1 ), n ∈ 1 : size (B,2 )
61
+ Cₘₙ = zero (eltype (C))
62
62
for k ∈ 1 : size (A,2 )
63
- Cᵢⱼ += A[i ,k] * B[k,j ]
63
+ Cₘₙ += A[m ,k] * B[k,n ]
64
64
end
65
- C[i,j ] = Cᵢⱼ
65
+ C[m,n ] = Cₘₙ
66
66
end
67
67
end
68
68
@@ -75,43 +75,42 @@ using LinearAlgebra
75
75
# C[i,j] = Cᵢⱼ
76
76
# end
77
77
# end
78
- AtmulBq = :(for j ∈ 1 : size (C,2 ), i ∈ 1 : size (C,1 )
79
- Cᵢⱼ = zero (eltype (C))
78
+ AtmulBq = :(for n ∈ 1 : size (C,2 ), m ∈ 1 : size (C,1 )
79
+ Cₘₙ = zero (eltype (C))
80
80
for k ∈ 1 : size (A,1 )
81
- Cᵢⱼ += A[k,i ] * B[k,j ]
81
+ Cₘₙ += A[k,m ] * B[k,n ]
82
82
end
83
- C[i,j ] = Cᵢⱼ
83
+ C[m,n ] = Cₘₙ
84
84
end )
85
85
lsAtmulB = LoopVectorization. LoopSet (AtmulBq);
86
86
# LoopVectorization.choose_order(lsAtmulB)
87
- @test LoopVectorization. choose_order (lsAtmulB) == (Symbol[:j , :i ,:k ], :k , U, T)
87
+ @test LoopVectorization. choose_order (lsAtmulB) == (Symbol[:m , :n ,:k ], :k , U, T)
88
88
89
89
function AtmulBavx! (C, A, B)
90
- @avx for j ∈ 1 : size (C,2 ), i ∈ 1 : size (C,1 )
91
- Cᵢⱼ = zero (eltype (C))
90
+ @avx for n ∈ 1 : size (C,2 ), m ∈ 1 : size (C,1 )
91
+ Cₘₙ = zero (eltype (C))
92
92
for k ∈ 1 : size (A,1 )
93
- Cᵢⱼ += A[k,i ] * B[k,j ]
93
+ Cₘₙ += A[k,m ] * B[k,n ]
94
94
end
95
- C[i,j ] = Cᵢⱼ
95
+ C[m,n ] = Cₘₙ
96
96
end
97
- end
98
-
97
+ end
99
98
function rank2AmulB! (C, Aₘ, Aₖ, B)
100
- @inbounds for i ∈ 1 : size (C,1 ), j ∈ 1 : size (C,2 )
101
- Cᵢⱼ = zero (eltype (C))
99
+ @inbounds for m ∈ 1 : size (C,1 ), n ∈ 1 : size (C,2 )
100
+ Cₘₙ = zero (eltype (C))
102
101
@fastmath for k ∈ 1 : size (B,1 )
103
- Cᵢⱼ += (Aₘ[i ,1 ]* Aₖ[1 ,k]+ Aₘ[i ,2 ]* Aₖ[2 ,k]) * B[k,j ]
102
+ Cₘₙ += (Aₘ[m ,1 ]* Aₖ[1 ,k]+ Aₘ[m ,2 ]* Aₖ[2 ,k]) * B[k,n ]
104
103
end
105
- C[i,j ] = Cᵢⱼ
104
+ C[m,n ] = Cₘₙ
106
105
end
107
106
end
108
107
function rank2AmulBavx! (C, Aₘ, Aₖ, B)
109
- @avx for i ∈ 1 : size (C,1 ), j ∈ 1 : size (C,2 )
110
- Cᵢⱼ = zero (eltype (C))
108
+ @avx for m ∈ 1 : size (C,1 ), n ∈ 1 : size (C,2 )
109
+ Cₘₙ = zero (eltype (C))
111
110
for k ∈ 1 : size (B,1 )
112
- Cᵢⱼ += (Aₘ[i ,1 ]* Aₖ[1 ,k]+ Aₘ[i ,2 ]* Aₖ[2 ,k]) * B[k,j ]
111
+ Cₘₙ += (Aₘ[m ,1 ]* Aₖ[1 ,k]+ Aₘ[m ,2 ]* Aₖ[2 ,k]) * B[k,n ]
113
112
end
114
- C[i,j ] = Cᵢⱼ
113
+ C[m,n ] = Cₘₙ
115
114
end
116
115
end
117
116
0 commit comments