@@ -222,15 +222,15 @@ end
222
222
function pointermax (ls:: LoopSet , ar:: ArrayReferenceMeta , n:: Int , sub:: Int , isvectorized:: Bool , loop:: Loop ):: Expr
223
223
pointermax (ls, ar, n, sub, isvectorized, looplengthexpr (loop, n)):: Expr
224
224
end
225
- function pointermax (ls:: LoopSet , ar:: ArrayReferenceMeta , n:: Int , sub:: Int , isvectorized:: Bool , stophint:: Int ):: Expr
225
+ function pointermax_index (ls:: LoopSet , ar:: ArrayReferenceMeta , n:: Int , sub:: Int , isvectorized:: Bool , stophint:: Int ):: Tuple{ Expr,Int}
226
226
# @unpack u₁loopnum, u₂loopnum, vectorizedloopnum, u₁, u₂ = us
227
227
loopsym = names (ls)[n]
228
- index = Expr (:tuple )
228
+ index = Expr (:tuple )
229
229
found_loop_sym = false
230
- call = Expr ( :call , lv ( :pointerforcomparison ))
231
- for i ∈ getindicesonly (ar)
230
+ ind = 0
231
+ for (j,i) ∈ enumerate ( getindicesonly (ar) )
232
232
if i === loopsym
233
- found_loop_sym = true
233
+ ind = j
234
234
if iszero (sub)
235
235
push! (index. args, stophint)
236
236
elseif isvectorized
@@ -242,25 +242,20 @@ function pointermax(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int, isvec
242
242
else
243
243
push! (index. args, staticexpr (stophint - sub))
244
244
end
245
- push! (call. args, vptr (ar))
246
- # return
247
245
else
248
246
push! (index. args, Expr (:call , lv (:Zero )))
249
247
end
250
248
end
251
- @assert found_loop_sym " Failed to find $loopsym "
252
- push! (call. args, index)
253
- call
254
- # @show ar, loopsym
249
+ @assert ind != 0 " Failed to find $loopsym "
250
+ index, ind
255
251
end
256
- function pointermax (ls:: LoopSet , ar:: ArrayReferenceMeta , n:: Int , sub:: Int , isvectorized:: Bool , stopsym):: Expr
257
- # @unpack u₁loopnum, u₂loopnum, vectorizedloopnum, u₁, u₂ = us
252
+ function pointermax_index (ls:: LoopSet , ar:: ArrayReferenceMeta , n:: Int , sub:: Int , isvectorized:: Bool , stopsym):: Tuple{Expr,Int}
258
253
loopsym = names (ls)[n]
259
- index = Expr (:tuple )
260
- found_loop_sym = false
261
- for i ∈ getindicesonly (ar)
254
+ index = Expr (:tuple );
255
+ ind = 0
256
+ for (j,i) ∈ enumerate ( getindicesonly (ar) )
262
257
if i === loopsym
263
- found_loop_sym = true
258
+ ind = j
264
259
if iszero (sub)
265
260
push! (index. args, stopsym)
266
261
elseif isvectorized
@@ -272,19 +267,76 @@ function pointermax(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int, isvec
272
267
else
273
268
push! (index. args, Expr (:call , lv (:vsub ), stopsym, sub))
274
269
end
275
- # return
276
270
else
277
271
push! (index. args, Expr (:call , lv (:Zero )))
278
272
end
279
273
end
280
- @assert found_loop_sym " Failed to find $loopsym "
274
+ @assert ind != 0 " Failed to find $loopsym "
275
+ index, ind
276
+ end
277
+ function pointermax (ls:: LoopSet , ar:: ArrayReferenceMeta , n:: Int , sub:: Int , isvectorized:: Bool , stopsym):: Expr
278
+ index = first (pointermax_index (ls, ar, n, sub, isvectorized, stopsym))
281
279
Expr (:call , lv (:pointerforcomparison ), vptr (ar), index)
282
280
# @show ar, loopsym
283
281
end
284
282
285
283
function defpointermax (ls:: LoopSet , ar:: ArrayReferenceMeta , n:: Int , sub:: Int , isvectorized:: Bool ):: Expr
286
284
Expr (:(= ), maxsym (vptr (ar), sub), pointermax (ls, ar, n, sub, isvectorized))
287
285
end
286
+ function offsetindex (dim:: Int , ind:: Int , scale:: Int , isvectorized:: Bool )
287
+ index = Expr (:tuple )
288
+ for d ∈ 1 : dim
289
+ if d != ind || iszero (scale)
290
+ push! (index. args, Expr (:call , lv (:Zero )))
291
+ continue
292
+ end
293
+ if isvectorized
294
+ if isone (scale)
295
+ push! (index. args, VECTORWIDTHSYMBOL)
296
+ else
297
+ push! (index. args, Expr (:call , lv (:vmul ), VECTORWIDTHSYMBOL, staticexpr (scale)))
298
+ end
299
+ else
300
+ push! (index. args, staticexpr (scale))
301
+ end
302
+ end
303
+ index
304
+ end
305
+ function append_pointer_maxes! (loopstart:: Expr , ls:: LoopSet , ar:: ArrayReferenceMeta , n:: Int , submax:: Int , isvectorized:: Bool , stopindicator)
306
+ if submax < 2
307
+ for sub ∈ 0 : submax
308
+ push! (loopstart. args, Expr (:(= ), maxsym (vptr (ar), sub), pointermax (ls, ar, n, sub, isvectorized, stopindicator)))
309
+ # push!(loopstart.args, defpointermax(ls, ptrdefs[termind], n, sub, isvectorized, stopindicator))
310
+ end
311
+ else
312
+ index, ind = pointermax_index (ls, ar, n, submax, isvectorized, stopindicator)
313
+ vptr_ar = vptr (ar)
314
+ _pointercompbase = maxsym (vptr_ar, submax)
315
+ pointercompbase = gensym (_pointercompbase)
316
+ push! (loopstart. args, Expr (:(= ), pointercompbase, Expr (:call , lv (:gesp ), vptr_ar, index)))
317
+ push! (loopstart. args, Expr (:(= ), _pointercompbase, Expr (:call , lv (:pointerforcomparison ), pointercompbase)))
318
+ dim = length (getindicesonly (ar))
319
+ # OFFSETPRECALCDEF = true
320
+ # if OFFSETPRECALCDEF
321
+ for sub ∈ 0 : submax- 1
322
+ push! (loopstart. args, Expr (:(= ), maxsym (vptr_ar, sub), Expr (:call , lv (:pointerforcomparison ), pointercompbase, offsetindex (dim, ind, submax - sub, isvectorized))))
323
+ end
324
+ # else
325
+ # indexoff = offsetindex(dim, ind, 1, isvectorized)
326
+ # for sub ∈ submax-1:-1:0
327
+ # _newpointercompbase = maxsym(vptr_ar, sub)
328
+ # newpointercompbase = gensym(_pointercompbase)
329
+ # push!(loopstart.args, Expr(:(=), newpointercompbase, Expr(:call, lv(:gesp), pointercompbase, indexoff)))
330
+ # push!(loopstart.args, Expr(:(=), _newpointercompbase, Expr(:call, lv(:pointerforcomparison), newpointercompbase)))
331
+ # _pointercompbase = _newpointercompbase
332
+ # pointercompbase = newpointercompbase
333
+ # end
334
+ # end
335
+ end
336
+ end
337
+ function append_pointer_maxes! (loopstart:: Expr , ls:: LoopSet , ar:: ArrayReferenceMeta , n:: Int , submax:: Int , isvectorized:: Bool )
338
+ append_pointer_maxes! (loopstart, ls, ar, n, submax, isvectorized, looplengthexpr (getloop (ls, names (ls)[n]), n))
339
+ end
288
340
289
341
function maxunroll (us:: UnrollSpecification , n)
290
342
@unpack u₁loopnum, u₂loopnum, u₁, u₂ = us
@@ -314,9 +366,7 @@ function startloop(ls::LoopSet, us::UnrollSpecification, n::Int, submax = maxunr
314
366
push! (loopstart. args, startloop (getloop (ls, loopsym), loopsym))
315
367
else
316
368
isvectorized = n == vectorizedloopnum
317
- for sub ∈ 0 : submax
318
- push! (loopstart. args, defpointermax (ls, ptrdefs[termind], n, sub, isvectorized))
319
- end
369
+ append_pointer_maxes! (loopstart, ls, ptrdefs[termind], n, submax, isvectorized)
320
370
end
321
371
loopstart
322
372
end
0 commit comments