Skip to content

Commit 982ce35

Browse files
committed
Update chooseorder tests for non-AVX512F
1 parent 61249dd commit 982ce35

File tree

2 files changed

+6
-4
lines changed

2 files changed

+6
-4
lines changed

src/determinestrategy.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -384,8 +384,10 @@ end
384384

385385
function solve_unroll(X, R, u₁L, u₂L, u₁step, u₂step)
386386
X₁, X₂, X₃, X₄ = X[1], X[2], X[3], X[4]
387-
# If we don't have AVX512, masks occupy a vector register
388-
VectorizationBase.AVX512F || (R[3] += 1)
387+
# If we don't have AVX512, masks occupy a vector register;
388+
# AVX512F is currently defined as `false` for non-x86 CPUs, but
389+
# should instead define generic constant `HAS_OPMASK_REGISTERS` in VectorizationBase.jl to use here instead.
390+
AVX512F || (R[3] += 1)
389391
R₁, R₂, R₃, R₄, R₅ = R[1], R[2], R[3], R[4], R[5]
390392
iszero(R₅) || return solve_unroll_iter(X, R, u₁L, u₂L, u₁step:u₁step:10, u₂step:u₂step:10)
391393
RR = REGISTER_COUNT - R₃ - R₄

test/gemm.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
@testset "GEMM" begin
22
# using LoopVectorization, LinearAlgebra, Test; T = Float64
33
Unum, Tnum = LoopVectorization.REGISTER_COUNT == 16 ? (2, 6) : (3, 9)
4-
Unumt, Tnumt = LoopVectorization.REGISTER_COUNT == 16 ? (3, 4) : (5, 5)
4+
Unumt, Tnumt = LoopVectorization.REGISTER_COUNT == 16 ? (2, 6) : (5, 5)
55
if LoopVectorization.REGISTER_COUNT != 8
66
@test LoopVectorization.mᵣ == Unum
77
@test LoopVectorization.nᵣ == Tnum
@@ -353,7 +353,7 @@
353353
if LoopVectorization.REGISTER_COUNT == 32
354354
@test LoopVectorization.choose_order(lsr2amb) == ([:n, :m, :k], :m, :n, :m, 3, 7)
355355
elseif LoopVectorization.REGISTER_COUNT == 16
356-
@test LoopVectorization.choose_order(lsr2amb) == ([:m, :n, :k], :n, :m, :m, 4, 2)
356+
@test LoopVectorization.choose_order(lsr2amb) == ([:m, :n, :k], :m, :n, :m, 1, 6)
357357
end
358358
function rank2AmulBavx!(C, Aₘ, Aₖ, B)
359359
@avx for m axes(C,1), n axes(C,2)

0 commit comments

Comments
 (0)