1
+ const NOpsType = Union{Int,Vector{Int}}
2
+
1
3
function Loop (ls:: LoopSet , ex:: Expr , sym:: Symbol , :: Type{<:AbstractUnitRange} )
2
4
ssym = String (sym)
3
5
start = gensym (ssym* " _loopstart" ); stop = gensym (ssym* " _loopstop" ); loopsym = gensym (ssym * " _loop" )
54
56
55
57
function ArrayReferenceMeta (
56
58
ls:: LoopSet , @nospecialize (ar:: ArrayRefStruct ), arraysymbolinds:: Vector{Symbol} ,
57
- opsymbols:: Vector{Symbol} , nopsv:: Vector{Int } , expandedv:: Vector{Bool}
59
+ opsymbols:: Vector{Symbol} , nopsv:: Vector{NOpsType } , expandedv:: Vector{Bool}
58
60
)
59
61
index_types = ar. index_types
60
62
indices = ar. indices
@@ -73,7 +75,14 @@ function ArrayReferenceMeta(
73
75
elseif index_types == ComputedIndex
74
76
opsym = opsymbols[ind]
75
77
if expandedv[ind]
76
- for j ∈ 0 : nopsv[ind]- 1
78
+ nops = nopsv[ind]
79
+ if isa (nops, Vector)
80
+ n = first (nops)
81
+ if all (isequal (n), nops)
82
+ nops = n
83
+ end
84
+ end
85
+ for j ∈ 0 : nops- 1
77
86
pushfirst! (index_vec, expandedopname (opsym, j))
78
87
pushfirst! (loopedindex, false )
79
88
end
@@ -144,7 +153,7 @@ function add_mref!(ls::LoopSet, ar::ArrayReferenceMeta, i::Int, ::Type{<:Abstrac
144
153
end
145
154
function create_mrefs! (
146
155
ls:: LoopSet , arf:: Vector{ArrayRefStruct} , as:: Vector{Symbol} , os:: Vector{Symbol} ,
147
- nopsv:: Vector{Int } , expanded:: Vector{Bool} , vargs
156
+ nopsv:: Vector{NOpsType } , expanded:: Vector{Bool} , vargs
148
157
)
149
158
mrefs = Vector {ArrayReferenceMeta} (undef, length (arf))
150
159
for i ∈ eachindex (arf)
@@ -230,14 +239,11 @@ function calcnops(ls::LoopSet, os::OperationStruct)
230
239
offsets = ls. loopsymbol_offsets
231
240
idxs = loopindex (ls, os. loopdeps, 0x04 ) # FIXME DRY
232
241
iszero (length (idxs)) && return 1
233
- Δidxs = map (i-> offsets[i+ 1 ]- offsets[i], idxs)
234
- nops = first (Δidxs)
235
- @assert all (isequal (nops), Δidxs)
236
- nops
242
+ return map (i-> offsets[i+ 1 ]- offsets[i], idxs)
237
243
end
238
- function isexpanded (ls:: LoopSet , ops:: Vector{OperationStruct} , nopsv:: Vector{Int } , i:: Int )
244
+ function isexpanded (ls:: LoopSet , ops:: Vector{OperationStruct} , nopsv:: Vector{NOpsType } , i:: Int )
239
245
nops = nopsv[i]
240
- isone (nops) && return false
246
+ (nops === 1 || nops == [ 1 ] ) && return false
241
247
os = ops[i]
242
248
optyp = optype (os)
243
249
if optyp == compute
@@ -250,7 +256,7 @@ function isexpanded(ls::LoopSet, ops::Vector{OperationStruct}, nopsv::Vector{Int
250
256
end
251
257
252
258
function add_op! (
253
- ls:: LoopSet , instr:: Instruction , ops:: Vector{OperationStruct} , nopsv:: Vector{Int } , expandedv:: Vector{Bool} , i:: Int ,
259
+ ls:: LoopSet , instr:: Instruction , ops:: Vector{OperationStruct} , nopsv:: Vector{NOpsType } , expandedv:: Vector{Bool} , i:: Int ,
254
260
mrefs:: Vector{ArrayReferenceMeta} , opsymbol, elementbytes:: Int
255
261
)
256
262
os = ops[i]
@@ -272,9 +278,15 @@ function add_op!(
272
278
push! (opoffsets, opoffsets[end ] + 1 )
273
279
return
274
280
end
281
+ if isa (nops, Vector)
282
+ n = first (nops)
283
+ if all (isequal (n), nops)
284
+ nops = n
285
+ end
286
+ end
275
287
# if expanded, optyp must be either loopvalue, or compute (with loopvalues in its ancestry, not cutoff by loads)
276
288
for offset = 0 : nops- 1
277
- sym = nops == 1 ? opsymbol : expandedopname (opsymbol, offset)
289
+ sym = nops === 1 ? opsymbol : expandedopname (opsymbol, offset)
278
290
op = Operation (
279
291
length (operations (ls)), sym, elementbytes, instr,
280
292
optyp, loopdependencies (ls, os, false , offset), reduceddependencies (ls, os, false , offset),
@@ -295,7 +307,7 @@ function add_parents_to_op!(ls::LoopSet, vparents::Vector{Operation}, up::Unsign
295
307
for j ∈ offsets[i]+ 1 : offsets[i+ 1 ] # if parents are expanded, add them all
296
308
pushfirst! (vparents, ops[j])
297
309
end
298
- end
310
+ end
299
311
else # if isexpanded
300
312
# Do we want to require that all Δidxs are equal?
301
313
# Because `CartesianIndex((2,3)) - 1` results in a methoderorr, I think this is reasonable for now
@@ -318,15 +330,15 @@ function add_parents_to_ops!(ls::LoopSet, ops::Vector{OperationStruct}, constoff
318
330
pushpreamble! (ls, Expr (:(= ), instr. instr, Expr (:macrocall , Symbol (" @inbounds" ), LineNumberNode (@__LINE__ , Symbol (@__FILE__ )), Expr (:ref , :vargs , constoffset))))
319
331
end
320
332
elseif ! isloopvalue (op)
321
- add_parents_to_op! (ls, parents (op), ops[i]. parents, k, Δ)
333
+ add_parents_to_op! (ls, parents (op), ops[i]. parents, k, Δ)
322
334
end
323
335
end
324
336
end
325
337
constoffset
326
338
end
327
339
function add_ops! (
328
340
ls:: LoopSet , instr:: Vector{Instruction} , ops:: Vector{OperationStruct} , mrefs:: Vector{ArrayReferenceMeta} ,
329
- opsymbols:: Vector{Symbol} , constoffset:: Int , nopsv:: Vector{Int } , expandedv:: Vector{Bool} , elementbytes:: Int
341
+ opsymbols:: Vector{Symbol} , constoffset:: Int , nopsv:: Vector{NOpsType } , expandedv:: Vector{Bool} , elementbytes:: Int
330
342
)
331
343
# @show ls.loopsymbols ls.loopsymbol_offsets
332
344
for i ∈ eachindex (ops)
@@ -378,7 +390,7 @@ function avx_loopset(instr, ops, arf, AM, LPSYM, LB, vargs)
378
390
resize! (ls. loop_order, ls. loopsymbol_offsets[end ])
379
391
arraysymbolinds = gen_array_syminds (AM)
380
392
opsymbols = [gensym (:op ) for _ ∈ eachindex (ops)]
381
- nopsv = calcnops .( Ref (ls), ops)
393
+ nopsv = NOpsType[ calcnops (ls, op) for op in ops]
382
394
expandedv = [isexpanded (ls, ops, nopsv, i) for i ∈ eachindex (ops)]
383
395
mrefs = create_mrefs! (ls, arf, arraysymbolinds, opsymbols, nopsv, expandedv, vargs)
384
396
pushpreamble! (ls, Expr (:(= ), ls. T, Expr (:call , :promote_type , [Expr (:call , :eltype , vptr (mref)) for mref ∈ mrefs]. .. )))
417
429
ls = _avx_loopset (OPS. parameters, ARF. parameters, AM. parameters, LPSYM. parameters, LB. parameters, vargs)
418
430
avx_body (ls, UT)
419
431
end
420
-
421
-
422
-
0 commit comments