@@ -19,19 +19,17 @@ using Test
19
19
dot3 (x, A, y) = dot (x, A, y)
20
20
end
21
21
function dot3avx (x, A, y)
22
- M, N = size (A)
23
22
s = zero (promote_type (eltype (x), eltype (A), eltype (y)))
24
- @avx for m ∈ 1 : M , n ∈ 1 : N
23
+ @avx for m ∈ axes (A, 1 ) , n ∈ axes (A, 2 )
25
24
s += x[m] * A[m,n] * y[n]
26
25
end
27
26
s
28
27
end
29
28
function dot3v2avx (x, A, y)
30
- M, N = size (A)
31
29
s = zero (promote_type (eltype (x), eltype (A), eltype (y)))
32
- @avx for n ∈ 1 : N
30
+ @avx for n ∈ axes (A, 2 )
33
31
t = zero (s)
34
- for m ∈ 1 : M
32
+ for m ∈ axes (A, 1 )
35
33
t += x[m] * A[m,n]
36
34
end
37
35
s += t * y[n]
788
786
M, N = 47 , 73 ;
789
787
x = rand (T, M); A = rand (T, M, N); y = rand (T, N);
790
788
d3 = dot3 (x, A, y)
791
- @test dot3avx (x, A, y) ≈ d3
792
- @test dot3v2avx (x, A, y) ≈ d3
789
+ @test dot3avx (LoopVectorization. stridedpointer (x), A, y) ≈ d3
790
+ @test dot3v2avx (x, A, LoopVectorization. stridedpointer (y)) ≈ d3
791
+ @test dot3avx24 (x, A, y) ≈ d3
793
792
@test dot3_avx (x, A, y) ≈ d3
794
793
795
794
A2 = similar (A);
@@ -930,15 +929,15 @@ end
930
929
@test X1 ≈ X2
931
930
@test Y1 ≈ Y2
932
931
933
- # a_re, a_im = rand(T, 2, 2, 2), rand(T, 2, 2, 2);
934
- # b_re, b_im = rand(T, 2, 2), rand(T, 2, 2);
935
- # c_re_1 = ones(T, 2, 2); c_re_2 = ones(T, 2, 2);
936
- # multiple_unrolls_split_depchains!(c_re_1, a_re, b_re, a_im, b_im, true) # [1 1; 1 1]
937
- # multiple_unrolls_split_depchains_avx!(c_re_2, a_re, b_re, a_im, b_im, true) # [1 1; 1 1]
938
- # @test c_re_1 ≈ c_re_2
939
- # multiple_unrolls_split_depchains!(c_re_1, a_re, b_re, a_im, b_im) # [1 1; 1 1]
940
- # multiple_unrolls_split_depchains_avx!(c_re_2, a_re, b_re, a_im, b_im) # [1 1; 1 1]
941
- # @test c_re_1 ≈ c_re_2
932
+ a_re, a_im = rand (T, 2 , 2 , 2 ), rand (T, 2 , 2 , 2 );
933
+ b_re, b_im = rand (T, 2 , 2 ), rand (T, 2 , 2 );
934
+ c_re_1 = ones (T, 2 , 2 ); c_re_2 = ones (T, 2 , 2 );
935
+ multiple_unrolls_split_depchains! (c_re_1, a_re, b_re, a_im, b_im, true ) # [1 1; 1 1]
936
+ multiple_unrolls_split_depchains_avx! (c_re_2, a_re, b_re, a_im, b_im, true ) # [1 1; 1 1]
937
+ @test c_re_1 ≈ c_re_2
938
+ multiple_unrolls_split_depchains! (c_re_1, a_re, b_re, a_im, b_im) # [1 1; 1 1]
939
+ multiple_unrolls_split_depchains_avx! (c_re_2, a_re, b_re, a_im, b_im) # [1 1; 1 1]
940
+ @test c_re_1 ≈ c_re_2
942
941
943
942
@test loopinductvardivision (X1) ≈ loopinductvardivisionavx (X2)
944
943
0 commit comments