1818
1919
2020@time @testset " LoopVectorization.jl" begin
21-
22-
21+ selfdotq = :(for i ∈ eachindex (a)
22+ s += a[i]* a[i]
23+ end )
24+ lsselfdot = LoopVectorization. LoopSet (selfdotq);
25+ io = IOBuffer ();
26+ println (io, LoopVectorization. operations (lsselfdot))
27+ s = String (take! (io))
28+ @test occursin (" Operation[var\" " , s)
29+ @test occursin (" s = 0" , s)
30+ @test occursin (" s = LoopVectorization.vfmadd" , s)
31+
2332@time @testset " dot" begin
2433 dotq = :(for i ∈ eachindex (a,b)
2534 s += a[i]* b[i]
@@ -1268,6 +1277,35 @@ end
12681277 end
12691278 end
12701279
1280+ function AtmulBpos! (C, A, B)
1281+ @inbounds for n ∈ 1 : size (C,2 ), m ∈ 1 : size (C,1 )
1282+ Cₘₙ = zero (eltype (C))
1283+ @simd ivdep for k ∈ 1 : size (A,1 )
1284+ Cₘₙ += A[k,m] * B[k,n]
1285+ end
1286+ C[m,n] > 0 && (C[m,n] = Cₘₙ)
1287+ end
1288+ end
1289+ function AtmulBposavx! (C, A, B)
1290+ @avx for n ∈ 1 : size (C,2 ), m ∈ 1 : size (C,1 )
1291+ Cₘₙ = zero (eltype (C))
1292+ for k ∈ 1 : size (A,1 )
1293+ Cₘₙ += A[k,m] * B[k,n]
1294+ end
1295+ C[m,n] > 0 && (C[m,n] = Cₘₙ)
1296+ end
1297+ end
1298+ function AtmulBpos_avx! (C, A, B)
1299+ @_avx for n ∈ 1 : size (C,2 ), m ∈ 1 : size (C,1 )
1300+ Cₘₙ = zero (eltype (C))
1301+ for k ∈ 1 : size (A,1 )
1302+ Cₘₙ += A[k,m] * B[k,n]
1303+ end
1304+ C[m,n] > 0 && (C[m,n] = Cₘₙ)
1305+ end
1306+ end
1307+
1308+
12711309 N = 117
12721310 for T ∈ (Float32, Float64, Int32, Int64)
12731311 if T <: Integer
@@ -1293,6 +1331,23 @@ end
12931331 @test c1 ≈ c2
12941332 fill! (c2, - 999999999 ); maybewriteoravx! (c2, a, b)
12951333 @test c1 ≈ c2
1334+
1335+ M, K, N = 83 , 85 , 79 ;
1336+ if T <: Integer
1337+ A = rand (T (- 100 ): T (100 ), K, M);
1338+ B = rand (T (- 100 ): T (100 ), K, N);
1339+ C1 = rand (T (- 100 ): T (100 ), M, N);
1340+ else
1341+ A = randn (T, K, M);
1342+ B = randn (T, K, N);
1343+ C1 = randn (T, M, N);
1344+ end
1345+ C2 = copy (C1); C3 = copy (C1);
1346+ AtmulBpos! (C1, A, B)
1347+ AtmulBposavx! (C2, A, B)
1348+ AtmulBpos_avx! (C3, A, B)
1349+ @test C1 ≈ C2
1350+ @test C1 ≈ C3
12961351 end
12971352end
12981353
@@ -1387,6 +1442,15 @@ end
13871442 end
13881443 end
13891444 end
1445+ myzero (A) = zero (eltype (A))
1446+ # function AmulBavx4!(C, A, B)
1447+ # @avx for m ∈ 1:size(A,1), n ∈ 1:size(B,2)
1448+ # C[m,n] = myzero(C)
1449+ # for k ∈ 1:size(A,2)
1450+ # C[m,n] += A[m,k] * B[k,n]
1451+ # end
1452+ # end
1453+ # end
13901454 function AmuladdBavx! (C, A, B, factor = 1 )
13911455 @avx for m ∈ 1 : size (A,1 ), n ∈ 1 : size (B,2 )
13921456 ΔCₘₙ = zero (eltype (C))
@@ -1457,6 +1521,21 @@ end
14571521 end
14581522 end
14591523 end
1524+ # function AmulB_avx4!(C, A, B)
1525+ # @_avx for m ∈ 1:size(A,1), n ∈ 1:size(B,2)
1526+ # C[m,n] = myzero(C)
1527+ # for k ∈ 1:size(A,2)
1528+ # C[m,n] += A[m,k] * B[k,n]
1529+ # end
1530+ # end
1531+ # end
1532+ # q = :(for m ∈ 1:size(A,1), n ∈ 1:size(B,2)
1533+ # C[m,n] = myzero(C)
1534+ # for k ∈ 1:size(A,2)
1535+ # C[m,n] += A[m,k] * B[k,n]
1536+ # end
1537+ # end)
1538+ # ls = LoopVectorization.LoopSet(q);
14601539 function AmuladdB_avx! (C, A, B, factor = 1 )
14611540 @_avx for m ∈ 1 : size (A,1 ), n ∈ 1 : size (B,2 )
14621541 ΔCₘₙ = zero (eltype (C))
0 commit comments