@@ -198,24 +198,24 @@ end
198
198
function num_parameters (AM)
199
199
num_param:: Int = AM[1 ]
200
200
# num_param += length(AM[2].parameters)
201
- num_param + length (AM[3 ]. parameters )
201
+ num_param + length (AM[3 ])
202
202
end
203
203
function gen_array_syminds (AM)
204
204
Symbol[Symbol (" ##arraysymbolind##" * i* ' #' ) for i ∈ 1 : (AM[1 ]):: Int ]
205
205
end
206
206
function process_metadata! (ls:: LoopSet , AM, num_arrays:: Int )
207
207
opoffsets = ls. operation_offsets
208
- expandbyoffset! (ls. outer_reductions, AM[2 ]. parameters , opoffsets)
209
- for (i,si) ∈ enumerate (AM[3 ]. parameters )
208
+ expandbyoffset! (ls. outer_reductions, AM[2 ], opoffsets)
209
+ for (i,si) ∈ enumerate (AM[3 ])
210
210
sii = si:: Int
211
211
s = gensym (:symlicm )
212
212
push! (ls. preamble_symsym, (opoffsets[sii] + 1 , s))
213
213
pushpreamble! (ls, Expr (:(= ), s, Expr (:macrocall , Symbol (" @inbounds" ), LineNumberNode (@__LINE__ ,Symbol (@__FILE__ )), Expr (:ref , :vargs , num_arrays + i))))
214
214
end
215
- expandbyoffset! (ls. preamble_symint, AM[4 ]. parameters , opoffsets)
216
- expandbyoffset! (ls. preamble_symfloat, AM[5 ]. parameters , opoffsets)
217
- expandbyoffset! (ls. preamble_zeros, AM[6 ]. parameters , opoffsets)
218
- expandbyoffset! (ls. preamble_funcofeltypes, AM[7 ]. parameters , opoffsets)
215
+ expandbyoffset! (ls. preamble_symint, AM[4 ], opoffsets)
216
+ expandbyoffset! (ls. preamble_symfloat, AM[5 ], opoffsets)
217
+ expandbyoffset! (ls. preamble_zeros, AM[6 ], opoffsets)
218
+ expandbyoffset! (ls. preamble_funcofeltypes, AM[7 ], opoffsets)
219
219
nothing
220
220
end
221
221
function expandbyoffset! (indexpand:: Vector{T} , inds, offsets:: Vector{Int} , expand:: Bool = true ) where {T <: Union{Int,Tuple{Int,<:Any}} }
@@ -437,7 +437,7 @@ function sizeofeltypes(v, num_arrays)::Int
437
437
end
438
438
439
439
function avx_loopset (instr:: Vector{Instruction} , ops:: Vector{OperationStruct} , arf:: Vector{ArrayRefStruct} ,
440
- AM:: Core.SimpleVector , LPSYM:: Core.SimpleVector , LB:: Core.SimpleVector , @nospecialize (vargs))
440
+ AM:: Vector{Any} , LPSYM:: Vector{Any} , LB:: Core.SimpleVector , @nospecialize (vargs))
441
441
ls = LoopSet (:LoopVectorization )
442
442
num_arrays = length (arf)
443
443
elementbytes = sizeofeltypes (vargs, num_arrays)
@@ -464,18 +464,33 @@ function avx_body(ls::LoopSet, UNROLL::Tuple{Int8,Int8,Int8,Int})
464
464
q
465
465
end
466
466
467
- function _avx_loopset_debug (:: Type{ OPS} , :: Type {ARF} , :: Type {AM} , :: Type {LPSYM} , :: Type{LB} , vargs... ) where {OPS, ARF, AM, LPSYM, LB}
467
+ function _avx_loopset_debug (:: Val{UNROLL} , :: Val{ OPS} , :: Val {ARF} , :: Val {AM} , :: Val {LPSYM} , :: Type{LB} , vargs... ) where {UNROLL, OPS, ARF, AM, LPSYM, LB}
468
468
@show OPS ARF AM LPSYM LB vargs
469
- _avx_loopset (OPS. parameters, ARF. parameters, AM. parameters, LPSYM. parameters, LB. parameters, typeof .(vargs))
469
+ inline, u₁, u₂, W = UNROLL
470
+ ls = _avx_loopset (OPS, ARF, AM, LPSYM, LB. parameters, typeof .(vargs))
471
+ ls. vector_width[] = W
472
+ ls
473
+ end
474
+ function tovector (@nospecialize (t))
475
+ v = Vector {Any} (undef, length (t))
476
+ for i ∈ eachindex (v)
477
+ tᵢ = t[i]
478
+ if tᵢ isa Tuple # reduce specialization?
479
+ v[i] = tovector (tᵢ)
480
+ else
481
+ v[i] = tᵢ
482
+ end
483
+ end
484
+ v
470
485
end
471
- function _avx_loopset (OPSsv:: Core.SimpleVector , ARFsv:: Core.SimpleVector , AMsv:: Core.SimpleVector , LPSYMsv:: Core.SimpleVector , LBsv:: Core.SimpleVector , @nospecialize (vargs))
486
+ function _avx_loopset (@nospecialize ( OPSsv), @nospecialize ( ARFsv), @nospecialize ( AMsv), @nospecialize ( LPSYMsv) , LBsv:: Core.SimpleVector , @nospecialize (vargs))
472
487
nops = length (OPSsv) ÷ 3
473
488
instr = Instruction[Instruction (OPSsv[3 i+ 1 ], OPSsv[3 i+ 2 ]) for i ∈ 0 : nops- 1 ]
474
489
ops = OperationStruct[ OPSsv[3 i] for i ∈ 1 : nops ]
475
490
avx_loopset (
476
491
instr, ops,
477
492
ArrayRefStruct[ARFsv... ],
478
- AMsv, LPSYMsv, LBsv, vargs
493
+ tovector ( AMsv), tovector ( LPSYMsv) , LBsv, vargs
479
494
)
480
495
end
481
496
"""
@@ -497,9 +512,9 @@ Execute an `@avx` block. The block's code is represented via the arguments:
497
512
`StaticLowerUnitRange(1)` because the lower bound of the iterator can be determined to be 1.
498
513
- `vargs...` holds the encoded pointers of all the arrays (see `VectorizationBase`'s various pointer types).
499
514
"""
500
- @generated function _avx_! (:: Val{UNROLL} , :: Type {OPS} , :: Type {ARF} , :: Type {AM} , :: Type {LPSYM} , lb:: LB , vargs... ) where {UNROLL, OPS, ARF, AM, LPSYM, LB}
515
+ @generated function _avx_! (:: Val{UNROLL} , :: Val {OPS} , :: Val {ARF} , :: Val {AM} , :: Val {LPSYM} , lb:: LB , vargs:: Tuple{Vararg{Any,K}} ) where {UNROLL, OPS, ARF, AM, LPSYM, LB, K }
501
516
# 1 + 1 # Irrelevant line you can comment out/in to force recompilation...
502
- ls = _avx_loopset (OPS. parameters , ARF. parameters , AM. parameters , LPSYM. parameters , LB. parameters, vargs)
517
+ ls = _avx_loopset (OPS, ARF, AM, LPSYM, LB. parameters, vargs. parameters )
503
518
# return @show avx_body(ls, UNROLL)
504
519
# @show UNROLL, OPS, ARF, AM, LPSYM, LB
505
520
avx_body (ls, UNROLL)
0 commit comments