@@ -418,12 +418,13 @@ function _avx_loopset(OPSsv, ARFsv, AMsv, LPSYMsv, LBsv, @nospecialize(vargs))
418
418
AMsv, LPSYMsv, LBsv, vargs
419
419
)
420
420
end
421
- const _body_ = Ref {Any} (nothing )
422
421
"""
423
- _avx_!(ut , ops, arf, am, lpsym, lb, vargs...)
422
+ _avx_!(unroll , ops, arf, am, lpsym, lb, vargs...)
424
423
425
424
Execute an `@avx` block. The block's code is represented via the arguments:
426
- - `ut` is `Val((U,T))`, where `U` is the unrolling factor and `T` ?has something to do with tiling?
425
+ - `unroll` is `Val((u₁,u₂))` and specifies the loop unrolling factor(s).
426
+ These values may be supplied manually via the `unroll` keyword
427
+ of [`@avx`](@ref).
427
428
- `ops` is `Tuple{mod1, sym1, op1, mod2, sym2, op2...}` encoding the operations of the loop.
428
429
`mod` and `sym` encode the module and symbol of the called function; `op` is an [`OperationStruct`](@ref)
429
430
encoding the details of the operation.
@@ -436,8 +437,8 @@ Execute an `@avx` block. The block's code is represented via the arguments:
436
437
`StaticLowerUnitRange(1)` because the lower bound of the iterator can be determined to be 1.
437
438
- `vargs...` holds the encoded pointers of all the arrays (see `VectorizationBase`'s various pointer types).
438
439
"""
439
- @generated function _avx_! (:: Val{UT } , :: Type{OPS} , :: Type{ARF} , :: Type{AM} , :: Type{LPSYM} , lb:: LB , vargs... ) where {UT , OPS, ARF, AM, LPSYM, LB}
440
+ @generated function _avx_! (:: Val{UNROLL } , :: Type{OPS} , :: Type{ARF} , :: Type{AM} , :: Type{LPSYM} , lb:: LB , vargs... ) where {UNROLL , OPS, ARF, AM, LPSYM, LB}
440
441
1 + 1 # Irrelevant line you can comment out/in to force recompilation...
441
442
ls = _avx_loopset (OPS. parameters, ARF. parameters, AM. parameters, LPSYM. parameters, LB. parameters, vargs)
442
- return _body_[] = copy ( avx_body (ls, UT) )
443
+ avx_body (ls, UNROLL )
443
444
end
0 commit comments