@@ -15,11 +15,11 @@ using StaticArrays, LoopVectorization
15
15
16
16
@inline function AmulB! (C, A, B)
17
17
@avx for n β axes (C,2 ), m β axes (C,1 )
18
- Cββ = zero (eltype (C))
18
+ Cmn = zero (eltype (C))
19
19
for k β axes (B,1 )
20
- Cββ += A[m,k] * B[k,n]
20
+ Cmn += A[m,k] * B[k,n]
21
21
end
22
- C[m,n] = Cββ
22
+ C[m,n] = Cmn
23
23
end
24
24
C
25
25
end
@@ -41,19 +41,19 @@ function runbenches(sr, ::Type{T}, fa = identity, fb = identity) where {T}
41
41
bench_results = Matrix {Float64} (undef, length (sr), 4 );
42
42
for (i,s) β enumerate (sr)
43
43
M, K, N = matdims (s)
44
- Aβ = @MMatrix rand (T, M, K)
45
- Bβ = @MMatrix rand (T, K, N)
46
- Aβ = Ref (SMatrix (Aβ ));
47
- Bβ = Ref (SMatrix (Bβ ));
48
- Cββ = fa (Aβ []) * fb (Bβ []);
49
- Cββ = AmulB (fa (Aβ []), fb (Bβ []))
50
- Cββ = similar (Cββ ); mul! (Cββ , fa (Aβ ), fb (Bβ ));
51
- Cββ = similar (Cββ ); AmulB! (Cββ , fa (Aβ ), fb (Bβ ));
52
- @assert Array (Cββ ) β Array (Cββ ) β Array (Cββ ) β Array (Cββ ) # Once upon a time Julia crashed on β for large static arrays
53
- bench_results[i,1 ] = @belapsed $ fa ($ Aβ []) * $ fb ($ Bβ [])
54
- bench_results[i,2 ] = @belapsed AmulB ($ fa ($ Aβ []), $ fb ($ Bβ []))
55
- bench_results[i,3 ] = @belapsed mul! ($ Cββ , $ fa ($ Aβ ), $ fb ($ Bβ ))
56
- bench_results[i,4 ] = @belapsed AmulB! ($ Cββ , $ fa ($ Aβ ), $ fb ($ Bβ ))
44
+ Am = @MMatrix rand (T, M, K)
45
+ Bm = @MMatrix rand (T, K, N)
46
+ As = Ref (SMatrix (Am ));
47
+ Bs = Ref (SMatrix (Bm ));
48
+ Css = fa (As []) * fb (Bs []);
49
+ Csl = AmulB (fa (As []), fb (Bs []))
50
+ Cms = similar (Css ); mul! (Cms , fa (Am ), fb (Bm ));
51
+ Cml = similar (Css ); AmulB! (Cml , fa (Am ), fb (Bm ));
52
+ @assert Array (Css ) β Array (Csl ) β Array (Cms ) β Array (Cml ) # Once upon a time Julia crashed on β for large static arrays
53
+ bench_results[i,1 ] = @belapsed $ fa ($ As []) * $ fb ($ Bs [])
54
+ bench_results[i,2 ] = @belapsed AmulB ($ fa ($ As []), $ fb ($ Bs []))
55
+ bench_results[i,3 ] = @belapsed mul! ($ Cms , $ fa ($ Am ), $ fb ($ Bm ))
56
+ bench_results[i,4 ] = @belapsed AmulB! ($ Cml , $ fa ($ Am ), $ fb ($ Bm ))
57
57
@show s, bench_results[i,:]
58
58
end
59
59
gflops = @. 1e-9 * matflop (sr) / bench_results
@@ -94,11 +94,11 @@ C_hybrid = HybridArray{Tuple{StaticArrays.Dynamic(),StaticArrays.Dynamic(),3,3}}
94
94
# B is K x N x L x J
95
95
function bmul! (C, A, B)
96
96
@avx for n in axes (C,2 ), m in axes (C,1 ), j in axes (C,4 ), i in axes (C,3 )
97
- Cβββ±Όα΅’ = zero (eltype (C))
97
+ Cmnji = zero (eltype (C))
98
98
for k in axes (B,1 ), l in axes (B,3 )
99
- Cβββ±Όα΅’ += A[m,k,i,l] * B[k,n,l,j]
99
+ Cmnji += A[m,k,i,l] * B[k,n,l,j]
100
100
end
101
- C[m,n,i,j] = Cβββ±Όα΅’
101
+ C[m,n,i,j] = Cmnji
102
102
end
103
103
end
104
104
```
0 commit comments