Skip to content

Commit cf37007

Browse files
committed
Bump SIMDPirates requirement to fix #153.
1 parent 7d94f8b commit cf37007

File tree

6 files changed

+31
-11
lines changed

6 files changed

+31
-11
lines changed

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.8.25"
4+
version = "0.8.26"
55

66
[deps]
77
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
@@ -15,7 +15,7 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
1515
[compat]
1616
DocStringExtensions = "0.8"
1717
OffsetArrays = "1"
18-
SIMDPirates = "0.8.24"
18+
SIMDPirates = "0.8.25"
1919
SLEEFPirates = "0.5.4"
2020
UnPack = "0,1"
2121
VectorizationBase = "0.12.31"

src/graphs.jl

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,15 @@ function LoopSet(mod::Symbol)
346346
)
347347
end
348348

349-
cacheunrolled!(ls::LoopSet, u₁loop, u₂loop, vectorized) = foreach(op -> setunrolled!(op, u₁loop, u₂loop, vectorized), operations(ls))
349+
function cacheunrolled!(ls::LoopSet, u₁loop, u₂loop, vectorized)
350+
foreach(op -> setunrolled!(op, u₁loop, u₂loop, vectorized), operations(ls))
351+
foreach(empty! children, operations(ls))
352+
for op operations(ls)
353+
for opp parents(op)
354+
push!(children(opp), op)
355+
end
356+
end
357+
end
350358

351359
num_loops(ls::LoopSet) = length(ls.loops)
352360
function oporder(ls::LoopSet)

src/lower_compute.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,8 @@ function lower_compute!(
255255
end
256256
if instr.instr === :identity && isone(length(parents_op))
257257
push!(q.args, Expr(:(=), varsym, instrcall.args[2]))
258+
elseif should_broadcast_op(op)
259+
push!(q.args, Expr(:(=), varsym, Expr(:call, lv(:vbroadcast), VECTORWIDTHSYMBOL, instrcall)))
258260
else
259261
push!(q.args, Expr(:(=), varsym, instrcall))
260262
end

src/lower_constant.jl

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
11

2+
function should_broadcast_op(op::Operation)
3+
isvectorized(op) && return false
4+
for opc children(op)
5+
(!isvectorized(op) || accesses_memory(op)) && return false
6+
end
7+
true
8+
end
9+
210
# @inline onefloat(::Type{T}) where {T} = one(sizeequivalentfloat(T))
311
# @inline oneinteger(::Type{T}) where {T} = one(sizeequivalentint(T))
412
@inline zerofloat(::Type{T}) where {T} = zero(sizeequivalentfloat(T))
513
@inline zerointeger(::Type{T}) where {T} = zero(sizeequivalentint(T))
614

7-
815
function lower_zero!(
916
q::Expr, op::Operation, ls::LoopSet, ua::UnrollArgs, zerotyp::NumberType = zerotype(ls, op)
1017
)
@@ -21,7 +28,8 @@ function lower_zero!(
2128
pushpreamble!(ls, Expr(:(=), newtypeT, Expr(:call, lv(:sizeequivalentfloat), ELTYPESYMBOL)))
2229
typeT = newtypeT
2330
end
24-
if vectorized loopdependencies(op) || vectorized reducedchildren(op) || vectorized reduceddependencies(op)
31+
# TODO: make should_broadcast_op handle everything.
32+
if isvectorized(op) || vectorized reducedchildren(op) || vectorized reduceddependencies(op) || should_broadcast_op(op)
2533
call = Expr(:call, lv(:vzero), VECTORWIDTHSYMBOL, typeT)
2634
else
2735
call = Expr(:call, :zero, typeT)
@@ -57,7 +65,7 @@ function lower_constant!(
5765
instruction = op.instruction
5866
constsym = instruction.instr
5967
reducedchildvectorized = vectorized reducedchildren(op)
60-
if reducedchildvectorized || vectorized loopdependencies(op) || vectorized reduceddependencies(op)
68+
if reducedchildvectorized || isvectorized(op) || vectorized reduceddependencies(op) || should_broadcast_op(op)
6169
# call = Expr(:call, lv(:vbroadcast), W, Expr(:call, lv(:maybeconvert), typeT, constsym))
6270
call = if reducedchildvectorized && vectorized loopdependencies(op)
6371
instrclass = getparentsreductzero(ls, op)

src/lowering.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -643,7 +643,7 @@ function define_eltype_vec_width!(q::Expr, ls::LoopSet, vectorized)
643643
push!(q.args, Expr(:(=), ELTYPESYMBOL, determine_eltype(ls)))
644644
push!(q.args, Expr(:(=), VECTORWIDTHSYMBOL, determine_width(ls, vectorized)))
645645
end
646-
function setup_preamble!(ls::LoopSet, us::UnrollSpecification)
646+
function setup_preamble!(ls::LoopSet, us::UnrollSpecification, Ureduct::Int)
647647
@unpack u₁loopnum, u₂loopnum, vectorizedloopnum, u₁, u₂ = us
648648
order = names(ls)
649649
u₁loopsym = order[u₁loopnum]
@@ -653,6 +653,7 @@ function setup_preamble!(ls::LoopSet, us::UnrollSpecification)
653653
iszero(length(ls.includedactualarrays)) || define_eltype_vec_width!(ls.preamble, ls, vectorized)
654654
lower_licm_constants!(ls)
655655
isone(num_loops(ls)) || pushpreamble!(ls, definemask(getloop(ls, vectorized)))#, u₁ > 1 && u₁loopnum == vectorizedloopnum))
656+
initialize_outer_reductions!(ls, 0, Ureduct, vectorized)
656657
for op operations(ls)
657658
(iszero(length(loopdependencies(op))) && iscompute(op)) && lower_compute!(ls.preamble, op, ls, UnrollArgs(u₁, u₁loopsym, u₂loopsym, vectorized, u₂, nothing), nothing)
658659
end
@@ -684,9 +685,8 @@ function lower_unrollspec(ls::LoopSet)
684685
# @show u₁, u₂
685686
order = names(ls)
686687
vectorized = order[vectorizedloopnum]
687-
setup_preamble!(ls, us)
688688
Ureduct = calc_Ureduct(ls, us)
689-
initialize_outer_reductions!(ls, 0, Ureduct, vectorized)
689+
setup_preamble!(ls, us, Ureduct)
690690
initgesps = add_loop_start_stop_manager!(ls)
691691
q = Expr(:let, initgesps, lower_unrolled_dynamic(ls, us, num_loops(ls), false))
692692
q = gc_preserve( ls, Expr(:block, q) )

src/operations.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ mutable struct Operation <: AbstractLoopOperation
155155
reduced_deps::Vector{Symbol}
156156
"Operations whose result this operation depends on"
157157
parents::Vector{Operation}
158+
"Operations who depend on this result"
159+
children::Vector{Operation}
158160
"For `memload` or `memstore`, encodes the array location"
159161
ref::ArrayReferenceMeta
160162
"`gensymmed` name of result."
@@ -184,7 +186,7 @@ mutable struct Operation <: AbstractLoopOperation
184186
identifier, variable, elementbytes, instruction, node_type,
185187
convert(Vector{Symbol},dependencies),
186188
convert(Vector{Symbol},reduced_deps),
187-
convert(Vector{Operation},parents),
189+
convert(Vector{Operation},parents), Operation[],
188190
ref, Symbol("##", variable, :_),
189191
reduced_children
190192
)
@@ -260,7 +262,7 @@ accesses_memory(op::AbstractLoopOperation) = isload(op) | isstore(op)
260262
elsize(op::Operation) = op.elementbytes
261263
dependson(op::Operation, sym::Symbol) = sym op.dependencies
262264
parents(op::Operation) = op.parents
263-
# children(op::Operation) = op.children
265+
children(op::Operation) = op.children
264266
loopdependencies(op::Operation) = op.dependencies
265267
reduceddependencies(op::Operation) = op.reduced_deps
266268
reducedchildren(op::Operation) = op.reduced_children

0 commit comments

Comments
 (0)