@@ -230,22 +230,27 @@ function normalize_offsets!(
230
230
end
231
231
return Int (minoffset)
232
232
end
233
- function isloopvalue (ls:: LoopSet , ind:: Symbol )
234
- for op ∈ operations (ls)
233
+ function isloopvalue (ls:: LoopSet , ind:: Symbol , isrooted:: Union{Nothing,Vector{Bool}} = nothing )
234
+ for (i,op) ∈ enumerate (operations (ls))
235
+ if (isrooted ≢ nothing )
236
+ isrooted[i] || continue
237
+ end
235
238
iscompute (op) || continue
236
239
for opp ∈ parents (op)# this is to confirm `ind` still has children
237
- (isloopvalue (opp) && instruction (opp). instr === ind) && return true
240
+ # (isloopvalue(opp) && instruction(opp).instr === ind) && return true
241
+ if (isloopvalue (opp) && instruction (opp). instr === ind)
242
+ return true
243
+ end
238
244
end
239
245
end
240
246
return false
241
247
end
242
248
function cse_constant_offsets! (
243
- ls:: LoopSet , allarrayrefs:: Vector{ArrayReferenceMeta} , allarrayrefsind:: Int , name_to_array_map:: Vector{Vector{Int}} ,
244
- arrayref_to_name_op_collection:: Vector{Vector{Tuple{Int,Int,Int}}} , shouldindbyind:: Vector{Bool}
249
+ ls:: LoopSet , allarrayrefs:: Vector{ArrayReferenceMeta} , allarrayrefsind:: Int , name_to_array_map:: Vector{Vector{Int}} , arrayref_to_name_op_collection:: Vector{Vector{Tuple{Int,Int,Int}}}
245
250
)
246
251
ar = allarrayrefs[allarrayrefsind]
247
252
# @show ar
248
- vptrar = vptr (ar)
253
+ # vptrar = vptr(ar)
249
254
arrayref_to_name_op = arrayref_to_name_op_collection[allarrayrefsind]
250
255
array_refs_with_same_name = name_to_array_map[first (first (arrayref_to_name_op))]
251
256
us = ls. unrollspecification
@@ -254,7 +259,7 @@ function cse_constant_offsets!(
254
259
strides = getstrides (ar)
255
260
offset = first (indices) === DISCONTIGUOUS
256
261
# gespindoffsets = fill(Symbol(""), length(li))
257
- gespinds = Expr ( :tuple )
262
+ gespindsummary = Vector {Tuple{Symbol,Int}} (undef, length (li) )
258
263
for i ∈ eachindex (li)
259
264
gespsymbol:: Symbol = Symbol (" " )
260
265
ii = i + offset
@@ -372,9 +377,10 @@ function cse_constant_offsets!(
372
377
end
373
378
end
374
379
constoffset = normalize_offsets! (ls, i, allarrayrefs, array_refs_with_same_name, arrayref_to_name_op_collection)
375
- pushgespind! (gespinds, ls, gespsymbol, constoffset, ind, li, i, check_shouldindbyind (ls, ind, shouldindbyind), true )
380
+ gespindsummary[i] = (gespsymbol, constoffset)
381
+ # pushgespind!(gespinds, ls, gespsymbol, constoffset, ind, li, i, check_shouldindbyind(ls, ind, shouldindbyind), true)
376
382
end
377
- return gespinds
383
+ return gespindsummary
378
384
end
379
385
@inline similardims (_, i) = i
380
386
@inline similardims (:: CartesianIndices{N} , i) where {N} = VectorizationBase. CartesianVIndex (ntuple (_ -> i, Val {N} ()))
@@ -393,10 +399,22 @@ end
393
399
# end
394
400
# return nothing
395
401
# end
402
+ function calcgespinds (ls:: LoopSet , ar:: ArrayReferenceMeta , gespindsummary:: Vector{Tuple{Symbol,Int}} , shouldindbyind:: Vector{Bool} )
403
+ gespinds = Expr (:tuple )
404
+ li = ar. loopedindex
405
+ indices = getindicesonly (ar)
406
+ for i ∈ eachindex (li)
407
+ ind = indices[i]
408
+ gespsymbol, constoffset = gespindsummary[i]
409
+ pushgespind! (gespinds, ls, gespsymbol, constoffset, ind, li[i], check_shouldindbyind (ls, ind, shouldindbyind), true )
410
+ end
411
+ gespinds
412
+ end
413
+
396
414
function pushgespind! (
397
- gespinds:: Expr , ls:: LoopSet , gespsymbol:: Symbol , constoffset:: Int , ind:: Symbol , li :: Vector{ Bool} , i :: Int , index_by_index:: Bool , fromgsp:: Bool
415
+ gespinds:: Expr , ls:: LoopSet , gespsymbol:: Symbol , constoffset:: Int , ind:: Symbol , isli :: Bool , index_by_index:: Bool , fromgsp:: Bool
398
416
)
399
- if li[i]
417
+ if isli
400
418
if ind === CONSTANTZEROINDEX
401
419
if gespsymbol === Symbol (" " )
402
420
push! (gespinds. args, staticexpr (constoffset))
@@ -448,13 +466,21 @@ function pushgespind!(
448
466
elseif fromgsp # from gsp means that a loop could be a CartesianIndices, so we may need to expand
449
467
# TODO : broadcast dimensions in case of cartesian indices
450
468
rangesym = ind
469
+ foundind = false
451
470
for op ∈ operations (ls)
452
471
if name (op) === ind
453
- loopsym = first (loopdependencies (op))
454
- rangesym = getloop (ls, loopsym). rangesym
472
+ loopdeps = loopdependencies (op)
473
+ foundind = true
474
+ if length (loopdeps) ≠ 0
475
+ rangesym = getloop (ls, first (loopdeps)). rangesym
476
+ else
477
+ isconstantop (op) || throw (LoopError (" Please file an issue with LoopVectorization.jl with a reproducer; tried to eliminate a non-constant operation." ))
478
+ rangesym = name (op)
479
+ end
480
+ break
455
481
end
456
482
end
457
- @assert rangesym ≢ ind
483
+ @assert foundind
458
484
if rangesym === Symbol (" " ) # there is no rangesym, must be statically sized.
459
485
pushgespsym! (gespinds, gespsymbol, constoffset)
460
486
else
@@ -518,32 +544,32 @@ function use_loop_induct_var!(
518
544
vptrar = vptr (ar)
519
545
# @show ar
520
546
Wisz = false # ls.vector_width == 0
521
- for i ∈ eachindex (li)
547
+ for (i,isli) ∈ enumerate (li)
522
548
ii = i + offset
523
549
ind = indices[ii]
524
550
Wisz && push! (gespinds. args, staticexpr (0 )) # wrong for `@_avx`...
525
551
if ! li[i] # if it wasn't set
526
552
uliv[i] = 0
527
553
push! (offsetprecalc_descript. args, 0 )
528
- Wisz || pushgespind! (gespinds, ls, Symbol (" " ), 0 , ind, li, i , true , false )
554
+ Wisz || pushgespind! (gespinds, ls, Symbol (" " ), 0 , ind, isli , true , false )
529
555
elseif ind === CONSTANTZEROINDEX
530
556
uliv[i] = 0
531
557
push! (offsetprecalc_descript. args, 0 )
532
- Wisz || pushgespind! (gespinds, ls, Symbol (" " ), 0 , ind, li, i , true , false )
558
+ Wisz || pushgespind! (gespinds, ls, Symbol (" " ), 0 , ind, isli , true , false )
533
559
elseif isbroadcast ||
534
560
((isone (ii) && (last (looporder) === ind)) && ! (otherindexunrolled (ls, ind, ar)) ||
535
561
multiple_with_name (vptrar, allarrayrefs)) ||
536
562
(iszero (ls. vector_width) && isstaticloop (getloop (ls, ind)))# ||
537
563
# Not doing normal offset indexing
538
564
uliv[i] = - findfirst (Base. Fix2 (=== ,ind), looporder):: Int
539
565
push! (offsetprecalc_descript. args, 0 ) # not doing offset indexing, so push 0
540
- Wisz || pushgespind! (gespinds, ls, Symbol (" " ), 0 , ind, li, i , true , false )
566
+ Wisz || pushgespind! (gespinds, ls, Symbol (" " ), 0 , ind, isli , true , false )
541
567
else
542
568
uliv[i] = findfirst (Base. Fix2 (=== ,ind), looporder):: Int
543
569
loop = getloop (ls, ind)
544
570
push! (offsetprecalc_descript. args, max (5 ,us. u₁+ 1 ,us. u₂+ 1 ))
545
571
use_offsetprecalc = true
546
- Wisz || pushgespind! (gespinds, ls, Symbol (" " ), 0 , ind, li, i , false , false )
572
+ Wisz || pushgespind! (gespinds, ls, Symbol (" " ), 0 , ind, isli , false , false )
547
573
end
548
574
# cases for pushgespind! and loopval!
549
575
# if !isloopval, same as before
0 commit comments