@@ -15,11 +15,11 @@ using StaticArrays, LoopVectorization
15
15
16
16
@inline function AmulB! (C, A, B)
17
17
@avx for n ∈ axes (C,2 ), m ∈ axes (C,1 )
18
- C_m_n = zero (eltype (C))
18
+ Cₘₙ = zero (eltype (C))
19
19
for k ∈ axes (B,1 )
20
- C_m_n += A[m,k] * B[k,n]
20
+ Cₘₙ += A[m,k] * B[k,n]
21
21
end
22
- C[m,n] = C_m_n
22
+ C[m,n] = Cₘₙ
23
23
end
24
24
C
25
25
end
@@ -41,19 +41,19 @@ function runbenches(sr, ::Type{T}, fa = identity, fb = identity) where {T}
41
41
bench_results = Matrix {Float64} (undef, length (sr), 4 );
42
42
for (i,s) ∈ enumerate (sr)
43
43
M, K, N = matdims (s)
44
- A_m = @MMatrix rand (T, M, K)
45
- B_m = @MMatrix rand (T, K, N)
46
- A_s = Ref (SMatrix (A_m ));
47
- B_s = Ref (SMatrix (B_m ));
48
- C_s_s = fa (A_s []) * fb (B_s []);
49
- C_s_l = AmulB (fa (A_s []), fb (B_s []))
50
- C_m_s = similar (C_s_s ); mul! (C_m_s , fa (A_m ), fb (B_m ));
51
- C_m_l = similar (C_s_s ); AmulB! (C_m_l , fa (A_m ), fb (B_m ));
52
- @assert Array (C_s_s ) ≈ Array (C_s_l ) ≈ Array (C_m_s ) ≈ Array (C_m_l ) # Once upon a time Julia crashed on ≈ for large static arrays
53
- bench_results[i,1 ] = @belapsed $ fa ($ A_s []) * $ fb ($ B_s [])
54
- bench_results[i,2 ] = @belapsed AmulB ($ fa ($ A_s []), $ fb ($ B_s []))
55
- bench_results[i,3 ] = @belapsed mul! ($ C_m_s , $ fa ($ A_m ), $ fb ($ B_m ))
56
- bench_results[i,4 ] = @belapsed AmulB! ($ C_m_l , $ fa ($ A_m ), $ fb ($ B_m ))
44
+ Aₘ = @MMatrix rand (T, M, K)
45
+ Bₘ = @MMatrix rand (T, K, N)
46
+ Aₛ = Ref (SMatrix (Aₘ ));
47
+ Bₛ = Ref (SMatrix (Bₘ ));
48
+ Cₛₛ = fa (Aₛ []) * fb (Bₛ []);
49
+ Cₛₗ = AmulB (fa (Aₛ []), fb (Bₛ []))
50
+ Cₘₛ = similar (Cₛₛ ); mul! (Cₘₛ , fa (Aₘ ), fb (Bₘ ));
51
+ Cₘₗ = similar (Cₛₛ ); AmulB! (Cₘₗ , fa (Aₘ ), fb (Bₘ ));
52
+ @assert Array (Cₛₛ ) ≈ Array (Cₛₗ ) ≈ Array (Cₘₛ ) ≈ Array (Cₘₗ ) # Once upon a time Julia crashed on ≈ for large static arrays
53
+ bench_results[i,1 ] = @belapsed $ fa ($ Aₛ []) * $ fb ($ Bₛ [])
54
+ bench_results[i,2 ] = @belapsed AmulB ($ fa ($ Aₛ []), $ fb ($ Bₛ []))
55
+ bench_results[i,3 ] = @belapsed mul! ($ Cₘₛ , $ fa ($ Aₘ ), $ fb ($ Bₘ ))
56
+ bench_results[i,4 ] = @belapsed AmulB! ($ Cₘₗ , $ fa ($ Aₘ ), $ fb ($ Bₘ ))
57
57
@show s, bench_results[i,:]
58
58
end
59
59
gflops = @. 1e-9 * matflop (sr) / bench_results
@@ -94,11 +94,11 @@ C_hybrid = HybridArray{Tuple{StaticArrays.Dynamic(),StaticArrays.Dynamic(),3,3}}
94
94
# B is K x N x L x J
95
95
function bmul! (C, A, B)
96
96
@avx for n in axes (C,2 ), m in axes (C,1 ), j in axes (C,4 ), i in axes (C,3 )
97
- C_m_n_j_i = zero (eltype (C))
97
+ Cₘₙⱼᵢ = zero (eltype (C))
98
98
for k in axes (B,1 ), l in axes (B,3 )
99
- C_m_n_j_i += A[m,k,i,l] * B[k,n,l,j]
99
+ Cₘₙⱼᵢ += A[m,k,i,l] * B[k,n,l,j]
100
100
end
101
- C[m,n,i,j] = C_m_n_j_i
101
+ C[m,n,i,j] = Cₘₙⱼᵢ
102
102
end
103
103
end
104
104
```
0 commit comments