Skip to content

Commit 1b07d0b

Browse files
committed
Special case znver1 for reductions, and fix a ifelse non-reduction regression introdcued by adding some reduction support.
1 parent 84a0b91 commit 1b07d0b

File tree

2 files changed

+21
-14
lines changed

2 files changed

+21
-14
lines changed

src/add_compute.jl

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -326,22 +326,26 @@ function add_compute_ifelse!(
326326
end
327327
vparents = Operation[cond, iffalse]
328328
setdiffv!(reduceddeps, deps, loopdependencies(iftrue))
329-
add_reduction_update_parent!(
330-
vparents, deps, reduceddeps, ls,
331-
iftrue, Instruction(:LoopVectorization,:ifelse), 2, elementbytes
332-
)
329+
if any(in(deps), reduceddeps)
330+
return add_reduction_update_parent!(
331+
vparents, deps, reduceddeps, ls,
332+
iftrue, Instruction(:LoopVectorization,:ifelse), 2, elementbytes
333+
)
334+
end
333335
elseif name(iffalse) === LHS
334336
vparents = Operation[cond, iftrue]
335337
setdiffv!(reduceddeps, deps, loopdependencies(iffalse))
336-
add_reduction_update_parent!(
337-
vparents, deps, reduceddeps, ls,
338-
iffalse, Instruction(:LoopVectorization,:ifelse), 3, elementbytes
339-
)
340-
else
341-
vparents = Operation[cond, iftrue, iffalse]
342-
op = Operation(length(operations(ls)), LHS, elementbytes, :ifelse, compute, deps, reduceddeps, vparents)
343-
pushop!(ls, op, LHS)
338+
if any(in(deps), reduceddeps)
339+
return add_reduction_update_parent!(
340+
vparents, deps, reduceddeps, ls,
341+
iffalse, Instruction(:LoopVectorization,:ifelse), 3, elementbytes
342+
)
343+
end
344344
end
345+
vparents = Operation[cond, iftrue, iffalse]
346+
op = Operation(length(operations(ls)), LHS, elementbytes, :ifelse, compute, deps, reduceddeps, vparents)
347+
pushop!(ls, op, LHS)
348+
345349
end
346350

347351
# adds x ^ (p::Real)

src/lowering.jl

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -811,8 +811,11 @@ function calc_Ureduct(ls::LoopSet, us::UnrollSpecification)
811811
loopisstatic = isstaticloop(getloop(ls, names(ls)[u₁loopnum]))
812812
loopisstatic &= ((vectorizedloopnum != u₁loopnum) | (!iszero(ls.vector_width[])))
813813
# loopisstatic ? u₁ : min(u₁, 4) # much worse than the other two options, don't use this one
814-
loopisstatic ? u₁ : (u₁ 4 ? 2 : 1)
815-
# loopisstatic ? u₁ : 1
814+
if Sys.CPU_NAME === "znver1"
815+
loopisstatic ? u₁ : 1
816+
else
817+
loopisstatic ? u₁ : (u₁ 4 ? 2 : 1)
818+
end
816819
else
817820
8#u₂#u₁
818821
# elseif num_loops(ls) == u₁loopnum

0 commit comments

Comments
 (0)