@@ -216,7 +216,7 @@ function outer_reduct_combine_expressions(ls::LoopSet, retv)
216
216
mvar = mangledvar (op)
217
217
instr = instruction (op)
218
218
out = Symbol (mvar, " ##onevec##" )
219
- instrcall = callexp (instr)
219
+ instrcall = callexpr (instr)
220
220
push! (instrcall. args, Expr (:call , lv (:vecmemaybe ), out))
221
221
if length (ls. outer_reductions) > 1
222
222
push! (instrcall. args, Expr (:call , lv (:vecmemaybe ), Expr (:call , GlobalRef (Core, :getfield ), Symbol (" #load#thread#ret#" ), i, false )))
@@ -344,7 +344,8 @@ function thread_one_loops_expr(
344
344
else
345
345
nothing
346
346
end
347
- iterdef = define_block_size (threadedloop, vloop, tn, ls. vector_width[])
347
+ # @unpack u₁loop, u₂loop, vloop, u₁, u₂max = ua
348
+ iterdef = define_block_size (threadedloop, ua. vloop, 0 , ls. vector_width[])
348
349
q = quote
349
350
var"#nthreads#" = $ choose_nthread # UInt
350
351
$ define_len
@@ -562,7 +563,10 @@ function valid_thread_loops(ls::LoopSet)
562
563
order, u₁loop, u₂loop, vectorized, u₁, u₂, c, shouldinline = choose_order_cost (ls)
563
564
# NOTE: `names` are being placed in the opposite order here versus normal lowering!
564
565
copyto! (names (ls), order); init_loop_map! (ls)
565
- ua = UnrollArgs (getloop (ls, u₁loop), getloop (ls, u₂loop), getloop (ls, vectorized), u₁, u₂, u₂)
566
+ u₁loop = getloop (ls, u₁loop)
567
+ _u₂loop = getloopid_or_nothing (ls, u₂loop)
568
+ u₂loop = _u₂loop === nothing ? u₁loop : _u₂loop
569
+ ua = UnrollArgs (u₁loop, u₂loop, getloop (ls, vectorized), u₁, u₂, u₂)
566
570
valid_thread_loop = fill (true , length (order))
567
571
for op ∈ operations (ls)
568
572
if isstore (op) && (length (reduceddependencies (op)) > 0 )
0 commit comments