@@ -144,7 +144,12 @@ function lower_block(
144
144
# loopsym = mangletiledsym(order[n], us, n)
145
145
loopsym = order[n]
146
146
# push!(blockq.args, incrementloopcounter(us, n, loopsym, UF))
147
+ # if n > 1 || iszero(ls.align_loops[])
147
148
push! (blockq. args, incrementloopcounter (ls, us, n, UF))
149
+ # else
150
+ # loopsym = names(ls)[n]
151
+ # push!(blockq.args, Expr(:(=), loopsym, Expr(:call, lv(:vadd), loopsym, Symbol("##ALIGNMENT#STEP##"))))
152
+ # end
148
153
blockq
149
154
end
150
155
@@ -185,48 +190,43 @@ end
185
190
# Expr(:block, loopiteratesatleastonce(loop), q)
186
191
# end
187
192
# end
188
- function lower_unroll_for_throughput (ls:: LoopSet , us:: UnrollSpecification , loop:: Loop , loopsym:: Symbol )
189
- UF = 4
190
- sl = startloop (ls, us, 1 , UF)
191
- tcc = terminatecondition (ls, us, 1 , false , 1 )
192
- tcu = terminatecondition (ls, us, 1 , false , UF)
193
- body = lower_block (ls, us, 1 , false , 1 )
194
- loopisstatic = isstaticloop (loop)
195
- tcu = loopisstatic ? tcu : expect (tcu)
196
- termcondu = gensym (:maybetermu )
197
- unrolledbody = Expr (:block )
198
- foreach (_ -> push! (unrolledbody. args, body), 1 : UF)
199
-
200
- # q = Expr(
201
- # :block,
202
- # Expr(:while, tcu, unrolledbody),
203
- # Expr(:while, tcc, body)
204
- # )
205
- # return Expr(:let, sl, q)
206
-
207
- push! (unrolledbody. args, Expr (:(= ), termcondu, tcu))
208
-
209
- unrolledloop = Expr (
210
- :block ,
211
- Expr (:while , termcondu, unrolledbody),
212
- Expr (:while , tcc, body)
213
- )
214
-
215
- termcond = gensym (:maybeterm )
216
- singleloop = Expr (
217
- :block ,
218
- Expr (:(= ), termcond, true ),
219
- Expr (:while , termcond, Expr (:block , body, Expr (:(= ), termcond, tcc)))
220
- )
221
-
222
- q = Expr (
223
- :block ,
224
- assume (tcc),
225
- Expr (:(= ), termcondu, tcu),
226
- Expr (:if , termcondu, unrolledloop, singleloop)
227
- )
228
- Expr (:let , sl, q)
229
- end
193
+ # function lower_unroll_for_throughput(ls::LoopSet, us::UnrollSpecification, loop::Loop, loopsym::Symbol)
194
+ # UF = 4
195
+ # sl = startloop(ls, us, 1, UF)
196
+ # tcc = terminatecondition(ls, us, 1, false, 1)
197
+ # tcu = terminatecondition(ls, us, 1, false, UF)
198
+ # body = lower_block(ls, us, 1, false, 1)
199
+ # loopisstatic = isstaticloop(loop)
200
+ # tcu = loopisstatic ? tcu : expect(tcu)
201
+ # termcondu = gensym(:maybetermu)
202
+ # unrolledbody = Expr(:block)
203
+ # foreach(_ -> push!(unrolledbody.args, body), 1:UF)
204
+ # # q = Expr(
205
+ # # :block,
206
+ # # Expr(:while, tcu, unrolledbody),
207
+ # # Expr(:while, tcc, body)
208
+ # # )
209
+ # # return Expr(:let, sl, q)
210
+ # push!(unrolledbody.args, Expr(:(=), termcondu, tcu))
211
+ # unrolledloop = Expr(
212
+ # :block,
213
+ # Expr(:while, termcondu, unrolledbody),
214
+ # Expr(:while, tcc, body)
215
+ # )
216
+ # termcond = gensym(:maybeterm)
217
+ # singleloop = Expr(
218
+ # :block,
219
+ # Expr(:(=), termcond, true),
220
+ # Expr(:while, termcond, Expr(:block, body, Expr(:(=), termcond, tcc)))
221
+ # )
222
+ # q = Expr(
223
+ # :block,
224
+ # assume(tcc),
225
+ # Expr(:(=), termcondu, tcu),
226
+ # Expr(:if, termcondu, unrolledloop, singleloop)
227
+ # )
228
+ # Expr(:let, sl, q)
229
+ # end
230
230
231
231
function assume (ex)
232
232
Expr (:call , Expr (:(.), Expr (:(.), :LoopVectorization , QuoteNode (:SIMDPirates )), QuoteNode (:assume )), ex)
@@ -247,6 +247,22 @@ function loopiteratesatleastonce(loop::Loop, as::Bool = true)
247
247
# as ? assume(comp) : expect(comp)
248
248
assume (comp)
249
249
end
250
+ # @inline step_to_align(x, ::Val{W}) where {W} = step_to_align(pointer(x), Val{W}())
251
+ # @inline step_to_align(x::Ptr{T}, ::Val{W}) where {W,T} = vsub(W, reinterpret(Int, x) & (W - 1))
252
+ # function align_inner_loop_expr(ls::LoopSet, us::UnrollSpecification, loop::Loop)
253
+ # alignincr = Symbol("##ALIGNMENT#STEP##")
254
+ # looplength = gensym(:inner_loop_length)
255
+ # pushpreamble!(ls, Expr(:(=), looplength, looplengthexpr(loop)))
256
+ # vp = vptr(operations(ls)[ls.align_loops[]])
257
+ # align_step = Expr(:call, :min, Expr(:call, lv(:step_to_align), vp, VECTORWIDTHSYMBOL), looplength)
258
+ # Expr(
259
+ # :block,
260
+ # Expr(:(=), alignincr, align_step),
261
+ # maskexpr(alignincr),
262
+ # lower_block(ls, us, 1, true, 1)
263
+ # )
264
+ # end
265
+
250
266
function lower_no_unroll (ls:: LoopSet , us:: UnrollSpecification , n:: Int , inclmask:: Bool )
251
267
usorig = ls. unrollspecification[]
252
268
nisvectorized = isvectorized (us, n)
@@ -260,11 +276,14 @@ function lower_no_unroll(ls::LoopSet, us::UnrollSpecification, n::Int, inclmask:
260
276
sl = startloop (ls, us, n)
261
277
tc = terminatecondition (ls, us, n, inclmask, 1 )
262
278
body = lower_block (ls, us, n, inclmask, 1 )
263
- isstatic = isstaticloop (loop )
264
-
279
+ # align_loop = isone(n) & (ls.align_loops[] > 0 )
280
+ isstatic = isstaticloop (loop) # & (!align_loop)
265
281
if ! isstatic && (usorig. u₁ == us. u₁) && (usorig. u₂ == us. u₂) && ! inclmask
266
282
tc = expect (tc)
267
283
end
284
+ # q = if align_loop
285
+ # Expr(:block, align_inner_loop_expr(ls, us, loop), Expr(:while, tc, body))
286
+ # elseif nisvectorized
268
287
q = if nisvectorized
269
288
# Expr(:block, loopiteratesatleastonce(loop, true), Expr(:while, expect(tc), body))
270
289
Expr (:block , Expr (:while , tc, body))
@@ -283,12 +302,15 @@ function lower_no_unroll(ls::LoopSet, us::UnrollSpecification, n::Int, inclmask:
283
302
# push!(body.args, Expr(:||, expect(tc), Expr(:break)))
284
303
# Expr(:block, Expr(:while, true, body))
285
304
end
286
-
287
305
if nisvectorized
288
306
# tc = terminatecondition(loop, us, n, loopsym, true, 1)
289
307
tc = terminatecondition (ls, us, n, true , 1 )
290
308
body = lower_block (ls, us, n, true , 1 )
291
- isone (num_loops (ls)) && pushfirst! (body. args, definemask (loop))
309
+ if isone (num_loops (ls))
310
+ pushfirst! (body. args, definemask (loop))
311
+ # elseif align_loop
312
+ # pushfirst!(body.args, definemask_for_alignment_cleanup(loop))
313
+ end
292
314
push! (q. args, Expr (:if , tc, body))
293
315
end
294
316
Expr (:block , Expr (:let , sl, q))
571
593
function definemask (loop:: Loop )
572
594
if isstaticloop (loop)
573
595
maskexpr (length (loop))
574
- elseif loop. startexact && loop. starthint == 1
596
+ elseif loop. startexact && isone ( loop. starthint)
575
597
maskexpr (loop. stopsym)
576
598
else
577
599
lexpr = if loop. startexact
@@ -584,6 +606,14 @@ function definemask(loop::Loop)
584
606
maskexpr (lexpr)
585
607
end
586
608
end
609
+ function definemask_for_alignment_cleanup (loop:: Loop )
610
+ lexpr = if loop. stopexact
611
+ Expr (:call , lv (:vsub ), loop. stophint + 1 , loop. itersym)
612
+ else
613
+ Expr (:call , lv (:vsub ), Expr (:call , lv (:vadd ), loop. stopsym, 1 ), loop. itersymbol)
614
+ end
615
+ maskexpr (lexpr)
616
+ end
587
617
function define_eltype_vec_width! (q:: Expr , ls:: LoopSet , vectorized)
588
618
push! (q. args, Expr (:(= ), ELTYPESYMBOL, determine_eltype (ls)))
589
619
push! (q. args, Expr (:(= ), VECTORWIDTHSYMBOL, determine_width (ls, vectorized)))
0 commit comments