Skip to content

Commit 5ad30d9

Browse files
committed
Minor probable performance improvement for tiledreductions.
1 parent 2c87fce commit 5ad30d9

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ SLEEFPirates = "476501e8-09a2-5ece-8869-fb82de89a1fa"
1212
VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
1313

1414
[compat]
15+
OffsetArrays = "1"
1516
Parameters = "0"
1617
SIMDPirates = "~0.6.6"
1718
SLEEFPirates = "~0.4"

src/lower_compute.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ function lower_compute!(
5757
# parentsyms = [opp.variable for opp ∈ parents(op)]
5858
Uiter = opunrolled ? U - 1 : 0
5959
isreduct = isreduction(op)
60-
if !isnothing(suffix) && isreduct
60+
if !isnothing(suffix) && isreduct && tiledouterreduction == -1
6161
instrfid = findfirst(isequal(instr.instr), (:vfmadd_fast, :vfnmadd_fast, :vfmsub_fast, :vfnmsub_fast))
6262
if instrfid !== nothing
6363
instr = Instruction((:vfmadd231, :vfnmadd231, :vfmsub231, :vfnmsub231)[instrfid])
@@ -75,7 +75,8 @@ function lower_compute!(
7575
for u 0:Uiter
7676
instrcall = Expr(instr) # Expr(:call, instr)
7777
varsym = if tiledouterreduction > 0 # then suffix !== nothing
78-
modsuffix = ((u + suffix*U) & 3)
78+
# modsuffix = ((u + suffix*U) & 3)
79+
modsuffix = (suffix & 3)
7980
Symbol(mvar, modsuffix)
8081
elseif opunrolled
8182
Symbol(mvar, u)

0 commit comments

Comments
 (0)