@@ -208,13 +208,13 @@ end
208208# If you change the number of arguments here, make commensurate changes
209209# to the `insert!` locations in `setup_call_noinline`.
210210@generated function __avx__! (
211- :: Val{UT } , :: Type{OPS} , :: Type{ARF} , :: Type{AM} , :: Type{LPSYM} , lb:: LB ,
211+ :: Val{UNROLL } , :: Type{OPS} , :: Type{ARF} , :: Type{AM} , :: Type{LPSYM} , lb:: LB ,
212212 :: Val{AR} , :: Val{D} , :: Val{IND} , subsetvals, arraydescript, vargs:: Vararg{<:Any,N}
213- ) where {UT , OPS, ARF, AM, LPSYM, LB, N, AR, D, IND}
213+ ) where {UNROLL , OPS, ARF, AM, LPSYM, LB, N, AR, D, IND}
214214 1 + 1
215215 num_vptrs = length (ARF. parameters):: Int
216216 vptrs = [gensym (:vptr ) for _ ∈ 1 : num_vptrs]
217- call = Expr (:call , lv (:_avx_! ), Val {UT } (), OPS, ARF, AM, LPSYM, :lb )
217+ call = Expr (:call , lv (:_avx_! ), Val {UNROLL } (), OPS, ARF, AM, LPSYM, :lb )
218218 for n ∈ 1 : num_vptrs
219219 push! (call. args, vptrs[n])
220220 end
245245end
246246
247247# Try to condense in type stable manner
248- function generate_call (ls:: LoopSet , IUT , debug:: Bool = false )
248+ function generate_call (ls:: LoopSet , inline_unroll , debug:: Bool = false )
249249 operation_descriptions = Expr (:curly , :Tuple )
250250 varnames = Symbol[]
251251 for op ∈ operations (ls)
@@ -260,20 +260,20 @@ function generate_call(ls::LoopSet, IUT, debug::Bool = false)
260260 argmeta = argmeta_and_consts_description (ls, arraysymbolinds)
261261 loop_bounds = loop_boundaries (ls)
262262 loop_syms = Expr (:curly , :Tuple , map (QuoteNode, ls. loopsymbols)... )
263- inline, U, T = IUT
263+ inline, u₁, u₂ = inline_unroll
264264 if inline | debug
265265 func = debug ? lv (:_avx_loopset_debug ) : lv (:_avx_! )
266266 lbarg = debug ? Expr (:call , :typeof , loop_bounds) : loop_bounds
267267 q = Expr (
268- :call , func, Expr (:call , Expr (:curly , :Val , (U,T ))),
268+ :call , func, Expr (:call , Expr (:curly , :Val , (u₁, u₂ ))),
269269 operation_descriptions, arrayref_descriptions, argmeta, loop_syms, lbarg
270270 )
271271 debug && deleteat! (q. args, 2 )
272272 foreach (ref -> push! (q. args, vptr (ref)), ls. refs_aliasing_syms)
273273 else # not forcing inline; calling __avx__! which calls an inlined _avx_!
274274 arraydescript = Expr (:tuple )
275275 q = Expr (
276- :call , lv (:__avx__! ), Expr (:call , Expr (:curly , :Val , (U,T ))),
276+ :call , lv (:__avx__! ), Expr (:call , Expr (:curly , :Val , (u₁, u₂ ))),
277277 operation_descriptions, arrayref_descriptions, argmeta, loop_syms, loop_bounds, arraydescript
278278 )
279279 for array ∈ ls. includedactualarrays
@@ -405,15 +405,15 @@ function setup_call_debug(ls::LoopSet)
405405 pushpreamble! (ls, generate_call (ls, (true ,zero (Int8),zero (Int8)), true ))
406406 ls. preamble
407407end
408- function setup_call (ls:: LoopSet , inline:: Bool = true , U = zero (Int8), T = zero (Int8))
408+ function setup_call (ls:: LoopSet , inline:: Bool = true , u₁ = zero (Int8), u₂ = zero (Int8))
409409 # We outline/inline at the macro level by creating/not creating an anonymous function.
410410 # The old API instead was based on inlining or not inline the generated function, but
411411 # the generated function must be inlined into the initial loop preamble for performance reasons.
412412 # Creating an anonymous function and calling it also achieves the outlining, while still
413413 # inlining the generated function into the loop preamble.
414414 if inline
415- setup_call_inline (ls, U, T )
415+ setup_call_inline (ls, u₁, u₂ )
416416 else
417- setup_call_noinline (ls, U, T )
417+ setup_call_noinline (ls, u₁, u₂ )
418418 end
419419end
0 commit comments