@@ -15,11 +15,11 @@ using StaticArrays, LoopVectorization
15
15
16
16
@inline function AmulB! (C, A, B)
17
17
@avx for n β axes (C,2 ), m β axes (C,1 )
18
- C_m_n = zero (eltype (C))
18
+ Cββ = zero (eltype (C))
19
19
for k β axes (B,1 )
20
- C_m_n += A[m,k] * B[k,n]
20
+ Cββ += A[m,k] * B[k,n]
21
21
end
22
- C[m,n] = C_m_n
22
+ C[m,n] = Cββ
23
23
end
24
24
C
25
25
end
@@ -41,19 +41,19 @@ function runbenches(sr, ::Type{T}, fa = identity, fb = identity) where {T}
41
41
bench_results = Matrix {Float64} (undef, length (sr), 4 );
42
42
for (i,s) β enumerate (sr)
43
43
M, K, N = matdims (s)
44
- A_m = @MMatrix rand (T, M, K)
45
- B_m = @MMatrix rand (T, K, N)
46
- A_s = Ref (SMatrix (A_m ));
47
- B_s = Ref (SMatrix (B_m ));
48
- C_s_s = fa (A_s []) * fb (B_s []);
49
- C_s_l = AmulB (fa (A_s []), fb (B_s []))
50
- C_m_s = similar (C_s_s ); mul! (C_m_s , fa (A_m ), fb (B_m ));
51
- C_m_l = similar (C_s_s ); AmulB! (C_m_l , fa (A_m ), fb (B_m ));
52
- @assert Array (C_s_s ) β Array (C_s_l ) β Array (C_m_s ) β Array (C_m_l ) # Once upon a time Julia crashed on β for large static arrays
53
- bench_results[i,1 ] = @belapsed $ fa ($ A_s []) * $ fb ($ B_s [])
54
- bench_results[i,2 ] = @belapsed AmulB ($ fa ($ A_s []), $ fb ($ B_s []))
55
- bench_results[i,3 ] = @belapsed mul! ($ C_m_s , $ fa ($ A_m ), $ fb ($ B_m ))
56
- bench_results[i,4 ] = @belapsed AmulB! ($ C_m_l , $ fa ($ A_m ), $ fb ($ B_m ))
44
+ Aβ = @MMatrix rand (T, M, K)
45
+ Bβ = @MMatrix rand (T, K, N)
46
+ Aβ = Ref (SMatrix (Aβ ));
47
+ Bβ = Ref (SMatrix (Bβ ));
48
+ Cββ = fa (Aβ []) * fb (Bβ []);
49
+ Cββ = AmulB (fa (Aβ []), fb (Bβ []))
50
+ Cββ = similar (Cββ ); mul! (Cββ , fa (Aβ ), fb (Bβ ));
51
+ Cββ = similar (Cββ ); AmulB! (Cββ , fa (Aβ ), fb (Bβ ));
52
+ @assert Array (Cββ ) β Array (Cββ ) β Array (Cββ ) β Array (Cββ ) # Once upon a time Julia crashed on β for large static arrays
53
+ bench_results[i,1 ] = @belapsed $ fa ($ Aβ []) * $ fb ($ Bβ [])
54
+ bench_results[i,2 ] = @belapsed AmulB ($ fa ($ Aβ []), $ fb ($ Bβ []))
55
+ bench_results[i,3 ] = @belapsed mul! ($ Cββ , $ fa ($ Aβ ), $ fb ($ Bβ ))
56
+ bench_results[i,4 ] = @belapsed AmulB! ($ Cββ , $ fa ($ Aβ ), $ fb ($ Bβ ))
57
57
@show s, bench_results[i,:]
58
58
end
59
59
gflops = @. 1e-9 * matflop (sr) / bench_results
@@ -94,11 +94,11 @@ C_hybrid = HybridArray{Tuple{StaticArrays.Dynamic(),StaticArrays.Dynamic(),3,3}}
94
94
# B is K x N x L x J
95
95
function bmul! (C, A, B)
96
96
@avx for n in axes (C,2 ), m in axes (C,1 ), j in axes (C,4 ), i in axes (C,3 )
97
- C_m_n_j_i = zero (eltype (C))
97
+ Cβββ±Όα΅’ = zero (eltype (C))
98
98
for k in axes (B,1 ), l in axes (B,3 )
99
- C_m_n_j_i += A[m,k,i,l] * B[k,n,l,j]
99
+ Cβββ±Όα΅’ += A[m,k,i,l] * B[k,n,l,j]
100
100
end
101
- C[m,n,i,j] = C_m_n_j_i
101
+ C[m,n,i,j] = Cβββ±Όα΅’
102
102
end
103
103
end
104
104
```
0 commit comments