Skip to content

Commit df8224a

Browse files
committed
Add mask register to register count for non-AVX512
1 parent e748186 commit df8224a

File tree

2 files changed

+4
-1
lines changed

2 files changed

+4
-1
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.9.9"
4+
version = "0.9.10"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"

src/determinestrategy.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,7 @@ function solve_unroll_iter(X, R, u₁L, u₂L, u₁range, u₂range)
372372
RR u₁temp*u₂temp*R₁ + u₁temp*R₂ + u₂temp*R₅ || continue
373373
tempcost = unroll_cost(X, u₁temp, u₂temp, u₁L, u₂L)
374374
# @show u₁temp, u₂temp, tempcost
375+
# @show u₁temp*u₂temp*R₁ + u₁temp*R₂ + u₂temp*R₅
375376
if tempcost bestcost
376377
bestcost = tempcost
377378
u₁best, u₂best = u₁temp, u₂temp
@@ -383,6 +384,8 @@ end
383384

384385
function solve_unroll(X, R, u₁L, u₂L, u₁step, u₂step)
385386
X₁, X₂, X₃, X₄ = X[1], X[2], X[3], X[4]
387+
# If we don't have AVX512, masks occupy a vector register
388+
VectorizationBase.AVX512F || (R[3] += 1)
386389
R₁, R₂, R₃, R₄, R₅ = R[1], R[2], R[3], R[4], R[5]
387390
iszero(R₅) || return solve_unroll_iter(X, R, u₁L, u₂L, u₁step:u₁step:10, u₂step:u₂step:10)
388391
RR = REGISTER_COUNT - R₃ - R₄

0 commit comments

Comments
 (0)