Skip to content

Commit 0c96b40

Browse files
committed
Added tests to try and catch noinline subsetviews.
1 parent 0c9d6b0 commit 0c96b40

File tree

1 file changed

+75
-0
lines changed

1 file changed

+75
-0
lines changed

test/gemm.jl

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,15 @@
290290
C[m,n] = Cₘₙ
291291
end
292292
end
293+
function rank2AmulBavx_noinline!(C, Aₘ, Aₖ, B)
294+
@avx inline=false for m 1:size(C,1), n 1:size(C,2)
295+
Cₘₙ = zero(eltype(C))
296+
for k 1:size(B,1)
297+
Cₘₙ += (Aₘ[m,1]*Aₖ[1,k]+Aₘ[m,2]*Aₖ[2,k]) * B[k,n]
298+
end
299+
C[m,n] = Cₘₙ
300+
end
301+
end
293302

294303
function mulCAtB_2x2blockavx!(C, A, B)
295304
M, N = size(C); K = size(B,1)
@@ -399,6 +408,60 @@
399408
end
400409
return C
401410
end
411+
function mulCAtB_2x2blockavx_noinline!(C, A, B)
412+
M, N = size(C); K = size(B,1)
413+
@assert size(C, 1) == size(A, 2)
414+
@assert size(C, 2) == size(B, 2)
415+
@assert size(A, 1) == size(B, 1)
416+
T = eltype(C)
417+
for m 1:2:(M & -2)
418+
m1 = m + 1
419+
for n 1:2:(N & -2)
420+
n1 = n + 1
421+
C11, C21, C12, C22 = zero(T), zero(T), zero(T), zero(T)
422+
@avx inline=false for k 1:K
423+
C11 += A[k,m] * B[k,n]
424+
C21 += A[k,m1] * B[k,n]
425+
C12 += A[k,m] * B[k,n1]
426+
C22 += A[k,m1] * B[k,n1]
427+
end
428+
C[m,n] = C11
429+
C[m1,n] = C21
430+
C[m,n1] = C12
431+
C[m1,n1] = C22
432+
end
433+
if isodd(N)
434+
C1n = 0.0
435+
C2n = 0.0
436+
@avx inline=false for k 1:K
437+
C1n += A[k,m] * B[k,N]
438+
C2n += A[k,m1] * B[k,N]
439+
end
440+
C[m,N] = C1n
441+
C[m1,N] = C2n
442+
end
443+
end
444+
if isodd(M)
445+
for n 1:2:(N & -2)
446+
n1 = n + 1
447+
Cm1, Cm2 = zero(T), zero(T)
448+
@avx inline=false for k 1:K
449+
Cm1 += A[k,M] * B[k,n]
450+
Cm2 += A[k,M] * B[k,n1]
451+
end
452+
C[M,n] = Cm1
453+
C[M,n1] = Cm2
454+
end
455+
if isodd(N)
456+
Cmn = 0.0
457+
@avx inline=false for k 1:K
458+
Cmn += A[k,M] * B[k,N]
459+
end
460+
C[M,N] = Cmn
461+
end
462+
end
463+
return C
464+
end
402465
# M = 77;
403466
# A = rand(M,M); B = rand(M,M); C = similar(A);
404467
# mulCAtB_2x2block_avx!(C,A,B)
@@ -481,6 +544,10 @@
481544
@test C C2
482545
fill!(C, 9999.999); mulCAtB_2x2blockavx!(C, A', B);
483546
@test C C2
547+
fill!(C, 9999.999); mulCAtB_2x2blockavx_noinline!(C, At, B);
548+
@test C C2
549+
fill!(C, 9999.999); mulCAtB_2x2blockavx_noinline!(C, A', B);
550+
@test C C2
484551
end
485552
@time @testset "_avx $T dynamic gemm" begin
486553
AmulB_avx1!(C, A, B)
@@ -549,6 +616,10 @@
549616
@test Cs C2
550617
fill!(Cs, 9999.999); mulCAtB_2x2blockavx!(Cs, As', Bs);
551618
@test Cs C2
619+
fill!(Cs, 9999.999); mulCAtB_2x2blockavx_noinline!(Cs, Ats, Bs);
620+
@test Cs C2
621+
fill!(Cs, 9999.999); mulCAtB_2x2blockavx_noinline!(Cs, As', Bs);
622+
@test Cs C2
552623
end
553624
@time @testset "_avx $T static gemm" begin
554625
AmulB_avx1!(Cs, As, Bs)
@@ -593,10 +664,14 @@
593664
@test C C2
594665
fill!(C, 9999.999); rank2AmulB_avx!(C, Aₘ, Aₖ, B)
595666
@test C C2
667+
fill!(C, 9999.999); rank2AmulBavx_noinline!(C, Aₘ, Aₖ, B)
668+
@test C C2
596669
fill!(C, 9999.999); rank2AmulBavx!(C, Aₘ, Aₖ′', B)
597670
@test C C2
598671
fill!(C, 9999.999); rank2AmulB_avx!(C, Aₘ, Aₖ′', B)
599672
@test C C2
673+
fill!(C, 9999.999); rank2AmulBavx_noinline!(C, Aₘ, Aₖ′', B)
674+
@test C C2
600675
end
601676

602677
end

0 commit comments

Comments
 (0)