Skip to content

Commit 3bd2fdf

Browse files
committed
WIP: attempt to get operations working properly with CartesianIndices
1 parent 5063d0e commit 3bd2fdf

File tree

2 files changed

+53
-30
lines changed

2 files changed

+53
-30
lines changed

src/graphs.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ struct LoopSet
178178
loops::Vector{Loop}
179179
opdict::Dict{Symbol,Operation}
180180
operations::Vector{Operation} # Split them to make it easier to iterate over just a subset
181+
operation_offsets::Vector{Int}
181182
outer_reductions::Vector{Int} # IDs of reduction operations that need to be reduced at end.
182183
loop_order::LoopOrder
183184
preamble::Expr
@@ -284,7 +285,7 @@ function LoopSet(mod::Symbol)# = :LoopVectorization)
284285
LoopSet(
285286
Symbol[], [0], Loop[],
286287
Dict{Symbol,Operation}(),
287-
Operation[],
288+
Operation[], [0],
288289
Int[],
289290
LoopOrder(),
290291
Expr(:block),

src/reconstruct_loopset.jl

Lines changed: 51 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -161,19 +161,19 @@ function process_metadata!(ls::LoopSet, AM, num_arrays::Int)::Vector{Symbol}
161161
append!(ls.preamble_ones, AM[7].parameters)
162162
arraysymbolinds
163163
end
164-
function parents_symvec(ls::LoopSet, u::Unsigned)
165-
loops = Symbol[]
166-
offsets = ls.loopsymbol_offsets
164+
function loopindex(ls::LoopSet, u::Unsigned, shift::Unsigned)
165+
idxs = Int[]
167166
while u != zero(u)
168-
idx = ( u % UInt8 ) & 0x0f
169-
for j = offsets[idx]+1:offsets[idx+1]
170-
push!(loops, getloopsym(ls, j))
171-
end
172-
u >>= 4
167+
push!(idxs, ( u % UInt8 ) & 0x0f)
168+
u >>= shift
173169
end
174-
return reverse!(loops)
170+
return reverse!(idxs)
175171
end
176-
loopdependencies(ls::LoopSet, os::OperationStruct) = parents_symvec(ls, os.loopdeps)
172+
function parents_symvec(ls::LoopSet, u::Unsigned, offset=0)
173+
idxs = loopindex(ls, u, 0x04) # FIXME DRY (undesirable that this gets hard-coded in multiple places)
174+
return Symbol[getloopsym(ls, i + offset) for i in idxs]
175+
end
176+
loopdependencies(ls::LoopSet, os::OperationStruct, offset=0) = parents_symvec(ls, os.loopdeps, offset)
177177
reduceddependencies(ls::LoopSet, os::OperationStruct) = parents_symvec(ls, os.reduceddeps)
178178
childdependencies(ls::LoopSet, os::OperationStruct) = parents_symvec(ls, os.childdeps)
179179

@@ -183,30 +183,47 @@ function add_op!(
183183
ls::LoopSet, instr::Instruction, os::OperationStruct, mrefs::Vector{ArrayReferenceMeta}, opsymbol, elementbytes::Int
184184
)
185185
# opsymbol = (isconstant(os) && instr != LOOPCONSTANT) ? instr.instr : opsymbol
186-
op = Operation(
187-
length(operations(ls)), opsymbol, elementbytes, instr,
188-
optype(os), loopdependencies(ls, os), reduceddependencies(ls, os),
189-
Operation[], (isload(os) | isstore(os)) ? mrefs[os.array] : NOTAREFERENCE,
190-
childdependencies(ls, os)
191-
)
192-
push!(ls.operations, op)
193-
op
186+
# If it's a CartesianIndex add or subtract, we may have to add multiple operations
187+
offsets = ls.loopsymbol_offsets
188+
idxs = loopindex(ls, os.loopdeps, 0x04) # FIXME DRY
189+
Δidxs = map(i->offsets[i+1]-offsets[i], idxs)
190+
nops = first(Δidxs)
191+
@assert all(isequal(nops), Δidxs)
192+
ops = Vector{Operation}(undef, nops)
193+
for offset = 0:nops-1
194+
sym = nops == 1 ? opsymbol : Symbol(String(opsymbol)*'#'*string(offset+1)*'#')
195+
ops[offset+1] = op = Operation(
196+
length(operations(ls)), sym, elementbytes, instr,
197+
optype(os), loopdependencies(ls, os, offset), reduceddependencies(ls, os),
198+
Operation[], (isload(os) | isstore(os)) ? mrefs[os.array] : NOTAREFERENCE,
199+
childdependencies(ls, os)
200+
)
201+
# @show op
202+
push!(ls.operations, op)
203+
end
204+
push!(ls.operation_offsets, ls.operation_offsets[end]+nops)
205+
ops
194206
end
195-
function add_parents_to_op!(ls::LoopSet, parents::Vector{Operation}, up::Unsigned)
207+
function add_parents_to_op!(ls::LoopSet, parents::Vector{Operation}, up::Unsigned, k::Int)
196208
ops = operations(ls)
197-
while up != zero(up)
198-
pushfirst!(parents, ops[ up % UInt8 ])
199-
up >>>= 8
209+
offsets = ls.operation_offsets
210+
for i loopindex(ls, up, 0x08) # FIXME DRY
211+
pushfirst!(parents, ops[offsets[i]+k])
200212
end
201213
end
202214
function add_parents_to_ops!(ls::LoopSet, ops::Vector{OperationStruct}, constoffset)
203-
for (i,op) enumerate(operations(ls))
204-
add_parents_to_op!(ls, parents(op), ops[i].parents)
205-
if isconstant(op)
206-
instr = instruction(op)
207-
if instr != LOOPCONSTANT && instr.mod !== :numericconstant
208-
constoffset += 1
209-
pushpreamble!(ls, Expr(:(=), instr.instr, Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__, Symbol(@__FILE__)), Expr(:ref, :vargs, constoffset))))
215+
offsets = ls.operation_offsets
216+
for i = 1:length(offsets)-1
217+
pos = offsets[i]
218+
for k = 1:offsets[i+1]-pos
219+
op = ls.operations[pos+k]
220+
add_parents_to_op!(ls, parents(op), ops[i].parents, k)
221+
if isconstant(op)
222+
instr = instruction(op)
223+
if instr != LOOPCONSTANT && instr.mod !== :numericconstant
224+
constoffset += 1
225+
pushpreamble!(ls, Expr(:(=), instr.instr, Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__, Symbol(@__FILE__)), Expr(:ref, :vargs, constoffset))))
226+
end
210227
end
211228
end
212229
end
@@ -215,12 +232,16 @@ end
215232
function add_ops!(
216233
ls::LoopSet, instr::Vector{Instruction}, ops::Vector{OperationStruct}, mrefs::Vector{ArrayReferenceMeta}, opsymbols::Vector{Symbol}, constoffset::Int, elementbytes::Int
217234
)
235+
# @show ls.loopsymbols ls.loopsymbol_offsets
218236
for i eachindex(ops)
219237
os = ops[i]
220238
opsymbol = opsymbols[os.symid]
221239
add_op!(ls, instr[i], os, mrefs, opsymbol, elementbytes)
222240
end
223241
add_parents_to_ops!(ls, ops, constoffset)
242+
for op in operations(ls)
243+
@show op
244+
end
224245
end
225246

226247
# elbytes(::VectorizationBase.AbstractPointer{T}) where {T} = sizeof(T)::Int
@@ -272,6 +293,7 @@ function avx_body(ls, UT)
272293
U, T = UT
273294
q = iszero(U) ? lower(ls) : lower(ls, U, T)
274295
length(ls.outer_reductions) == 0 ? push!(q.args, nothing) : push!(q.args, loopset_return_value(ls, Val(true)))
296+
@show q
275297
q
276298
end
277299

0 commit comments

Comments
 (0)