@@ -117,7 +117,6 @@ function lower_load_no_optranslation!(
117
117
loopdeps = loopdependencies (op)
118
118
# @assert isvectorized(op)
119
119
opu₁ = isu₁unrolled (op)
120
-
121
120
u = ifelse (opu₁, u₁, 1 )
122
121
mvar = Symbol (variable_name (op, Core. ifelse (isu₂unrolled (op), suffix,- 1 )), ' _' , u)
123
122
falseexpr = Expr (:call , lv (:False )); rs = staticexpr (reg_size (ls))
@@ -195,22 +194,16 @@ function lower_load_for_optranslation!(
195
194
q:: Expr , op:: Operation , posindicator:: UInt8 , ls:: LoopSet , td:: UnrollArgs , mask:: Bool , translationind:: Int
196
195
)
197
196
@unpack u₁loop, u₂loop, vloop, u₁, u₂max, suffix = td
198
-
199
197
# @unpack u₁, u₁loopsym, u₂loopsym, vloopsym, u₂max, suffix = td
200
198
iszero (suffix) || return
201
-
202
199
total_unroll = u₁ + u₂max - 1
203
-
204
-
205
200
mref = op. ref
206
201
inds_by_ptroff = indices_calculated_by_pointer_offsets (ls, mref)
207
202
# initial offset pointer
208
-
209
203
# Unroll directions can be + or -
210
204
# we want to start at minimum position.
211
205
step₁ = gethint (step (u₁loop))
212
206
step₂ = gethint (step (u₂loop))
213
-
214
207
# abs of steps are equal
215
208
equal_steps = (step₁ == step₂) ⊻ (posindicator ≠ 0x03 )
216
209
# @show step₁, step₂, posindicator, equal_steps
@@ -227,9 +220,7 @@ function lower_load_for_optranslation!(
227
220
end
228
221
end
229
222
push! (q. args, Expr (:(= ), gptr, Expr (:call , lv (:gesp ), ptr, gespinds)))
230
-
231
223
fill! (inds_by_ptroff, true )
232
-
233
224
@unpack ref, loopedindex = mref
234
225
indices = copy (getindices (ref))
235
226
# old_translation_index = indices[translationind]
@@ -262,9 +253,7 @@ function lower_load_for_optranslation!(
262
253
op. ref = mref
263
254
# loopedindex[translationind] = false
264
255
# indices[translationind] = old_translation_index
265
-
266
256
shouldbroadcast = (! isvectorized (op)) && any (isvectorized, children (op))
267
-
268
257
# now we need to assign the `Vec`s from the `VecUnroll` to the correct name.
269
258
variable_name_u = Symbol (variable_name (op, - 1 ), ' _' , total_unroll)
270
259
variable_name_data = Symbol (variable_name_u, " ##data##" )
@@ -399,46 +388,132 @@ function rejectinterleave(ls::LoopSet, op::Operation, vloop::Loop, idsformap::Su
399
388
end
400
389
(first (getindices (op)) === vloopsym) && (length (idsformap) ≠ first (getstrides (op)) * gethint (strd))
401
390
end
391
+ # function lower_load_collection_manual_u₁unroll!(
392
+ # q::Expr, ls::LoopSet, opidmap::Vector{Int},
393
+ # idsformap::SubArray{Tuple{Int,Int}, 1, Vector{Tuple{Int,Int}}, Tuple{UnitRange{Int}}, true},
394
+ # ua::UnrollArgs, mask::Bool, inds_calc_by_ptr_offset::Vector{Bool}, op::Operation
395
+ # )
396
+ # @unpack u₁, u₁loop, u₁loopsym, u₂loopsym, vloopsym, vloop, suffix = ua
397
+ # _mvar = mangledvar(op)
398
+ # op.mangledvariable = gensym!(ls,_mvar)
399
+ # for u ∈ 0:u₁-1
400
+ # lower_load_collection!(
401
+ # q, ls, opidmap, idsformap, ua, mask, inds_calc_by_ptr_offset
402
+ # )
403
+ # end
404
+ # op.mangledvariable = _mvar
405
+ # end
402
406
function lower_load_collection! (
403
407
q:: Expr , ls:: LoopSet , opidmap:: Vector{Int} ,
404
408
idsformap:: SubArray{Tuple{Int,Int}, 1, Vector{Tuple{Int,Int}}, Tuple{UnitRange{Int}}, true} ,
405
409
ua:: UnrollArgs , mask:: Bool , inds_calc_by_ptr_offset:: Vector{Bool}
406
410
)
407
- @unpack u₁, u₁loopsym, u₂loopsym, vloopsym, vloop, suffix = ua
411
+ @unpack u₁, u₁loop, u₁loopsym, u₂loopsym, vloopsym, vloop, suffix = ua
412
+
408
413
ops = operations (ls)
409
414
nouter = length (idsformap)
410
415
# ua = UnrollArgs(nouter, unrollsyms, u₂, 0)
411
416
# idsformap contains (index, offset) pairs
412
417
op = ops[opidmap[first (first (idsformap))]]
418
+ # if isu₁unrolled(op) && u₁ > 1 && !isknown(step(u₁loop))
419
+ # return lower_load_collection_manual_u₁unroll!(
420
+ # q, ls, opidmap, idsformap, ua,
421
+ # mask, inds_calc_by_ptr_offset, op
422
+ # )
423
+ # end
413
424
opindices = getindices (op)
414
425
interleave = first (opindices) === vloopsym
415
426
# construct dummy unrolled loop
416
427
offset_dummy_loop = Loop (first (opindices), MaybeKnown (1 ), MaybeKnown (1024 ), MaybeKnown (1 ), Symbol (" " ), Symbol (" " ))
417
428
unrollcurl₂ = unrolled_curly (op, nouter, offset_dummy_loop, vloop, mask, 1 ) # interleave always 1 here
418
429
inds = mem_offset_u (op, ua, inds_calc_by_ptr_offset, false )
419
430
falseexpr = Expr (:call , lv (:False )); rs = staticexpr (reg_size (ls));
420
- if isu₁unrolled (op) && u₁ > 1 # both unrolled
421
- if interleave # TODO : handle this better than using `rejectinterleave`
422
- interleaveval = - nouter
431
+
432
+ manualunrollu₁ = if isu₁unrolled (op) && u₁ > 1 # both unrolled
433
+ if isknown (step (u₁loop)) && sum (Base. Fix2 (=== ,u₁loopsym), getindicesonly (op)) == 1
434
+ if interleave # TODO : handle this better than using `rejectinterleave`
435
+ interleaveval = - nouter
436
+ else
437
+ interleaveval = 0
438
+ end
439
+ unrollcurl₁ = unrolled_curly (op, u₁, ua. u₁loop, vloop, mask, interleaveval)
440
+ inds = Expr (:call , unrollcurl₁, inds)
441
+ false
423
442
else
424
- interleaveval = 0
443
+ true # u₁ > 1 already checked to reach here
425
444
end
426
- unrollcurl₁ = unrolled_curly (op, u₁, ua . u₁loop, vloop, mask, interleaveval)
427
- inds = Expr ( :call , unrollcurl₁, inds)
445
+ else
446
+ false
428
447
end
429
448
uinds = Expr (:call , unrollcurl₂, inds)
430
449
vp = vptr (op)
431
450
loadexpr = Expr (:call , lv (:_vload ), vp, uinds)
432
451
# not using `add_memory_mask!(storeexpr, op, ua, mask)` because we checked `isconditionalmemop` earlier in `lower_load_collection!`
433
- (mask && isvectorized (op)) && push! (loadexpr. args, MASKSYMBOL)
452
+ u₁vectorized = u₁loopsym === vloopsym
453
+ if (mask && isvectorized (op))
454
+ if ! (manualunrollu₁ & u₁vectorized)
455
+ push! (loadexpr. args, MASKSYMBOL)
456
+ end
457
+ end
434
458
push! (loadexpr. args, falseexpr, rs)
435
459
collectionname = Symbol (vp, " ##collection##number#" , opidmap[first (first (idsformap))], " #" , suffix, " ##size##" , nouter, " ##u₁##" , u₁)
436
- # getfield to extract data from `VecUnroll` object, so we have a tuple
437
- push! (q. args, Expr (:(= ), collectionname, Expr (:call , :getfield , loadexpr, 1 )))
438
- u = Core. ifelse (isu₁unrolled (op), u₁, 1 )
439
- for (i,(opid,o)) ∈ enumerate (idsformap)
440
- _op = ops[opidmap[opid]]
441
- mvar = Symbol (variable_name (_op, Core. ifelse (isu₂unrolled (_op), suffix, - 1 )), ' _' , u)
442
- push! (q. args, Expr (:(= ), mvar, Expr (:call , :getfield , collectionname, i, false )))
460
+ gf = GlobalRef (Core,:getfield )
461
+ if manualunrollu₁
462
+ masklast = mask & u₁vectorized & isvectorized (op)
463
+ extractedvs = Vector {Expr} (undef, length (idsformap))
464
+ for i ∈ eachindex (extractedvs)
465
+ extractedvs[i] = Expr (:tuple )
466
+ end
467
+ for u ∈ 0 : u₁- 1
468
+ collectionname_u = Symbol (collectionname, :_ , u)
469
+ if u ≠ 0
470
+ inds = mem_offset_u (op, ua, inds_calc_by_ptr_offset, false , u)
471
+ uinds = Expr (:call , unrollcurl₂, inds)
472
+ loadexpr = copy (loadexpr)
473
+ loadexpr. args[3 ] = Expr (:call , unrollcurl₂, inds)
474
+ (((u+ 1 ) == u₁) & masklast) && push! (loadexpr. args, MASKSYMBOL)
475
+ end
476
+ # unpack_collection!(q, ls, opidmap, idsformap, ua, loadexpr, collectionname, op, false)
477
+ push! (q. args, Expr (:(= ), collectionname_u, Expr (:call , gf, loadexpr, 1 )))
478
+ # getfield to extract data from `VecUnroll` object, so we have a tuple
479
+ for (i,(opid,o)) ∈ enumerate (idsformap)
480
+ ext = extractedvs[i]
481
+ if (u+ 1 ) == u₁
482
+ _op = ops[opidmap[opid]]
483
+ mvar = Symbol (variable_name (_op, Core. ifelse (isu₂unrolled (_op), suffix, - 1 )), ' _' , u₁)
484
+ push! (q. args, Expr (:(= ), mvar, Expr (:call , lv (:VecUnroll ), ext)))
485
+ end
486
+ push! (ext. args, Expr (:call , gf, collectionname_u, i, false ))
487
+ end
488
+ end
489
+ else
490
+ push! (q. args, Expr (:(= ), collectionname, Expr (:call , gf, loadexpr, 1 )))
491
+ # getfield to extract data from `VecUnroll` object, so we have a tuple
492
+ u = Core. ifelse (isu₁unrolled (op), u₁, 1 )
493
+ for (i,(opid,o)) ∈ enumerate (idsformap)
494
+ extractedv = Expr (:call , gf, collectionname, i, false )
495
+
496
+ _op = ops[opidmap[opid]]
497
+ mvar = Symbol (variable_name (_op, Core. ifelse (isu₂unrolled (_op), suffix, - 1 )), ' _' , u)
498
+ push! (q. args, Expr (:(= ), mvar, extractedv))
499
+ end
500
+ # unpack_collection!(q, ls, opidmap, idsformap, ua, loadexpr, collectionname, op, true)
443
501
end
444
502
end
503
+ # function unpack_collection!(
504
+ # q::Expr, ls::LoopSet, opidmap::Vector{Int},
505
+ # idsformap::SubArray{Tuple{Int,Int}, 1, Vector{Tuple{Int,Int}}, Tuple{UnitRange{Int}}, true},
506
+ # ua::UnrollArgs, loadexpr::Expr, collectionname::Symbol, op::Operation
507
+ # )
508
+ # gf = GlobalRef(Core,:getfield)
509
+ # push!(q.args, Expr(:(=), collectionname, Expr(:call, gf, loadexpr, 1)))
510
+ # # getfield to extract data from `VecUnroll` object, so we have a tuple
511
+ # u = Core.ifelse(isu₁unrolled(op), u₁, 1)
512
+ # for (i,(opid,o)) ∈ enumerate(idsformap)
513
+ # extractedv = Expr(:call, gf, collectionname, i, false)
514
+
515
+ # _op = ops[opidmap[opid]]
516
+ # mvar = Symbol(variable_name(_op, Core.ifelse(isu₂unrolled(_op), suffix, -1)), '_', u)
517
+ # push!(q.args, Expr(:(=), mvar, extractedv))
518
+ # end
519
+ # end
0 commit comments