Skip to content

Commit ad11641

Browse files
committed
Make creation of maxpointers for comp slightly more efficient.
1 parent 50e262e commit ad11641

File tree

1 file changed

+72
-22
lines changed

1 file changed

+72
-22
lines changed

src/loopstartstopmanager.jl

Lines changed: 72 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -222,15 +222,15 @@ end
222222
function pointermax(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int, isvectorized::Bool, loop::Loop)::Expr
223223
pointermax(ls, ar, n, sub, isvectorized, looplengthexpr(loop, n))::Expr
224224
end
225-
function pointermax(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int, isvectorized::Bool, stophint::Int)::Expr
225+
function pointermax_index(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int, isvectorized::Bool, stophint::Int)::Tuple{Expr,Int}
226226
# @unpack u₁loopnum, u₂loopnum, vectorizedloopnum, u₁, u₂ = us
227227
loopsym = names(ls)[n]
228-
index = Expr(:tuple)
228+
index = Expr(:tuple)
229229
found_loop_sym = false
230-
call = Expr(:call, lv(:pointerforcomparison))
231-
for i getindicesonly(ar)
230+
ind = 0
231+
for (j,i) enumerate(getindicesonly(ar))
232232
if i === loopsym
233-
found_loop_sym = true
233+
ind = j
234234
if iszero(sub)
235235
push!(index.args, stophint)
236236
elseif isvectorized
@@ -242,25 +242,20 @@ function pointermax(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int, isvec
242242
else
243243
push!(index.args, staticexpr(stophint - sub))
244244
end
245-
push!(call.args, vptr(ar))
246-
# return
247245
else
248246
push!(index.args, Expr(:call, lv(:Zero)))
249247
end
250248
end
251-
@assert found_loop_sym "Failed to find $loopsym"
252-
push!(call.args, index)
253-
call
254-
# @show ar, loopsym
249+
@assert ind != 0 "Failed to find $loopsym"
250+
index, ind
255251
end
256-
function pointermax(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int, isvectorized::Bool, stopsym)::Expr
257-
# @unpack u₁loopnum, u₂loopnum, vectorizedloopnum, u₁, u₂ = us
252+
function pointermax_index(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int, isvectorized::Bool, stopsym)::Tuple{Expr,Int}
258253
loopsym = names(ls)[n]
259-
index = Expr(:tuple)
260-
found_loop_sym = false
261-
for i getindicesonly(ar)
254+
index = Expr(:tuple);
255+
ind = 0
256+
for (j,i) enumerate(getindicesonly(ar))
262257
if i === loopsym
263-
found_loop_sym = true
258+
ind = j
264259
if iszero(sub)
265260
push!(index.args, stopsym)
266261
elseif isvectorized
@@ -272,19 +267,76 @@ function pointermax(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int, isvec
272267
else
273268
push!(index.args, Expr(:call, lv(:vsub), stopsym, sub))
274269
end
275-
# return
276270
else
277271
push!(index.args, Expr(:call, lv(:Zero)))
278272
end
279273
end
280-
@assert found_loop_sym "Failed to find $loopsym"
274+
@assert ind != 0 "Failed to find $loopsym"
275+
index, ind
276+
end
277+
function pointermax(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int, isvectorized::Bool, stopsym)::Expr
278+
index = first(pointermax_index(ls, ar, n, sub, isvectorized, stopsym))
281279
Expr(:call, lv(:pointerforcomparison), vptr(ar), index)
282280
# @show ar, loopsym
283281
end
284282

285283
function defpointermax(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int, isvectorized::Bool)::Expr
286284
Expr(:(=), maxsym(vptr(ar), sub), pointermax(ls, ar, n, sub, isvectorized))
287285
end
286+
function offsetindex(dim::Int, ind::Int, scale::Int, isvectorized::Bool)
287+
index = Expr(:tuple)
288+
for d 1:dim
289+
if d != ind || iszero(scale)
290+
push!(index.args, Expr(:call, lv(:Zero)))
291+
continue
292+
end
293+
if isvectorized
294+
if isone(scale)
295+
push!(index.args, VECTORWIDTHSYMBOL)
296+
else
297+
push!(index.args, Expr(:call, lv(:vmul), VECTORWIDTHSYMBOL, staticexpr(scale)))
298+
end
299+
else
300+
push!(index.args, staticexpr(scale))
301+
end
302+
end
303+
index
304+
end
305+
function append_pointer_maxes!(loopstart::Expr, ls::LoopSet, ar::ArrayReferenceMeta, n::Int, submax::Int, isvectorized::Bool, stopindicator)
306+
if submax < 2
307+
for sub 0:submax
308+
push!(loopstart.args, Expr(:(=), maxsym(vptr(ar), sub), pointermax(ls, ar, n, sub, isvectorized, stopindicator)))
309+
# push!(loopstart.args, defpointermax(ls, ptrdefs[termind], n, sub, isvectorized, stopindicator))
310+
end
311+
else
312+
index, ind = pointermax_index(ls, ar, n, submax, isvectorized, stopindicator)
313+
vptr_ar = vptr(ar)
314+
_pointercompbase = maxsym(vptr_ar, submax)
315+
pointercompbase = gensym(_pointercompbase)
316+
push!(loopstart.args, Expr(:(=), pointercompbase, Expr(:call, lv(:gesp), vptr_ar, index)))
317+
push!(loopstart.args, Expr(:(=), _pointercompbase, Expr(:call, lv(:pointerforcomparison), pointercompbase)))
318+
dim = length(getindicesonly(ar))
319+
# OFFSETPRECALCDEF = true
320+
# if OFFSETPRECALCDEF
321+
for sub 0:submax-1
322+
push!(loopstart.args, Expr(:(=), maxsym(vptr_ar, sub), Expr(:call, lv(:pointerforcomparison), pointercompbase, offsetindex(dim, ind, submax - sub, isvectorized))))
323+
end
324+
# else
325+
# indexoff = offsetindex(dim, ind, 1, isvectorized)
326+
# for sub ∈ submax-1:-1:0
327+
# _newpointercompbase = maxsym(vptr_ar, sub)
328+
# newpointercompbase = gensym(_pointercompbase)
329+
# push!(loopstart.args, Expr(:(=), newpointercompbase, Expr(:call, lv(:gesp), pointercompbase, indexoff)))
330+
# push!(loopstart.args, Expr(:(=), _newpointercompbase, Expr(:call, lv(:pointerforcomparison), newpointercompbase)))
331+
# _pointercompbase = _newpointercompbase
332+
# pointercompbase = newpointercompbase
333+
# end
334+
# end
335+
end
336+
end
337+
function append_pointer_maxes!(loopstart::Expr, ls::LoopSet, ar::ArrayReferenceMeta, n::Int, submax::Int, isvectorized::Bool)
338+
append_pointer_maxes!(loopstart, ls, ar, n, submax, isvectorized, looplengthexpr(getloop(ls, names(ls)[n]), n))
339+
end
288340

289341
function maxunroll(us::UnrollSpecification, n)
290342
@unpack u₁loopnum, u₂loopnum, u₁, u₂ = us
@@ -314,9 +366,7 @@ function startloop(ls::LoopSet, us::UnrollSpecification, n::Int, submax = maxunr
314366
push!(loopstart.args, startloop(getloop(ls, loopsym), loopsym))
315367
else
316368
isvectorized = n == vectorizedloopnum
317-
for sub 0:submax
318-
push!(loopstart.args, defpointermax(ls, ptrdefs[termind], n, sub, isvectorized))
319-
end
369+
append_pointer_maxes!(loopstart, ls, ptrdefs[termind], n, submax, isvectorized)
320370
end
321371
loopstart
322372
end

0 commit comments

Comments
 (0)