We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents 9894290 + df8224a commit 61249ddCopy full SHA for 61249dd
Project.toml
@@ -1,7 +1,7 @@
1
name = "LoopVectorization"
2
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
3
authors = ["Chris Elrod <[email protected]>"]
4
-version = "0.9.9"
+version = "0.9.10"
5
6
[deps]
7
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
src/determinestrategy.jl
@@ -372,6 +372,7 @@ function solve_unroll_iter(X, R, u₁L, u₂L, u₁range, u₂range)
372
RR ≥ u₁temp*u₂temp*R₁ + u₁temp*R₂ + u₂temp*R₅ || continue
373
tempcost = unroll_cost(X, u₁temp, u₂temp, u₁L, u₂L)
374
# @show u₁temp, u₂temp, tempcost
375
+ # @show u₁temp*u₂temp*R₁ + u₁temp*R₂ + u₂temp*R₅
376
if tempcost ≤ bestcost
377
bestcost = tempcost
378
u₁best, u₂best = u₁temp, u₂temp
@@ -383,6 +384,8 @@ end
383
384
385
function solve_unroll(X, R, u₁L, u₂L, u₁step, u₂step)
386
X₁, X₂, X₃, X₄ = X[1], X[2], X[3], X[4]
387
+ # If we don't have AVX512, masks occupy a vector register
388
+ VectorizationBase.AVX512F || (R[3] += 1)
389
R₁, R₂, R₃, R₄, R₅ = R[1], R[2], R[3], R[4], R[5]
390
iszero(R₅) || return solve_unroll_iter(X, R, u₁L, u₂L, u₁step:u₁step:10, u₂step:u₂step:10)
391
RR = REGISTER_COUNT - R₃ - R₄
0 commit comments