3
3
Base.:| (u:: Unsigned , it:: IndexType ) = u | UInt8 (it)
4
4
Base.:(== )(u:: Unsigned , it:: IndexType ) = (u % UInt8) == UInt8 (it)
5
5
6
+ struct StaticType{T} end
7
+ @inline gettype (:: StaticType{T} ) where {T} = T
6
8
function _append_fields! (t:: Expr , body:: Expr , sym:: Symbol , :: Type{T} ) where {T}
7
9
gf = GlobalRef (Core,:getfield )
8
10
for f ∈ 1 : fieldcount (T)
@@ -13,7 +15,7 @@ function _append_fields!(t::Expr, body::Expr, sym::Symbol, ::Type{T}) where {T}
13
15
elseif fieldcount (TF) ≡ 0
14
16
push! (t. args, gfcall)
15
17
elseif TF <: DataType
16
- push! (t. args, Expr (:call , GlobalRef (Base, :Val ), gfcall))
18
+ push! (t. args, Expr (:call , Expr ( :curly , lv ( :StaticType ), gfcall) ))
17
19
else
18
20
newsym = gensym (sym)
19
21
push! (body. args, Expr (:(= ), newsym, gfcall))
31
33
elseif fieldcount (T) ≡ 0
32
34
push! (t. args, :r )
33
35
elseif T <: DataType
34
- push! (t. args, Expr (:call , GlobalRef (Base, :Val ), :r ))
36
+ push! (t. args, Expr (:call , Expr ( :curly , lv ( :StaticType ), :r ) ))
35
37
else
36
38
_append_fields! (t, body, :r , T)
37
39
end
@@ -49,7 +51,7 @@ function rebuild_fields(offset::Int, ::Type{T}) where {T}
49
51
elseif fieldcount (TF) ≡ 0
50
52
push! (call. args, Expr (:call , gf, :t , (offset += 1 ), false ))
51
53
elseif TF <: DataType
52
- push! (call. args, Expr (:call , GlobalRef (VectorizationBase, :unwrap ), Expr (:call , gf, :t , (offset += 1 ), false )))
54
+ push! (call. args, Expr (:call , lv ( :gettype ), Expr (:call , gf, :t , (offset += 1 ), false )))
53
55
else
54
56
arg, offset = rebuild_fields (offset, TF)
55
57
push! (call. args, arg)
63
65
elseif fieldcount (T) ≡ 0
64
66
call = Expr (:call , GlobalRef (Core,:getfield ), :t , 1 , false )
65
67
elseif T <: DataType
66
- call = Expr (:call , GlobalRef (VectorizationBase, :unwrap ), Expr (:call , GlobalRef (Core,:getfield ), :t , 1 , false ))
68
+ call = Expr (:call , lv ( :gettype ), Expr (:call , GlobalRef (Core,:getfield ), :t , 1 , false ))
67
69
else
68
70
call, _ = rebuild_fields (0 , T)
69
71
end
@@ -196,16 +198,16 @@ function getroots(ls::LoopSet)::Vector{Bool}
196
198
getroots! (rooted, ls)
197
199
end
198
200
function getroots! (rooted:: Vector{Bool} , ls:: LoopSet )
199
- fill! (rooted, false )
200
- ops = operations (ls)
201
- for or ∈ ls. outer_reductions
202
- recursively_set_parents_true! (rooted, ops[or])
203
- end
204
- for op ∈ ops
205
- isstore (op) && recursively_set_parents_true! (rooted, op)
206
- end
207
- length (ls . includedactualarrays) == 0 || remove_outer_reducts! (rooted, ls)
208
- return rooted
201
+ fill! (rooted, false )
202
+ ops = operations (ls)
203
+ for or ∈ ls. outer_reductions
204
+ recursively_set_parents_true! (rooted, ops[or])
205
+ end
206
+ for op ∈ ops
207
+ isstore (op) && recursively_set_parents_true! (rooted, op)
208
+ end
209
+ remove_outer_reducts! (rooted, ls)
210
+ return rooted
209
211
end
210
212
function OperationStruct! (varnames:: Vector{Symbol} , ids:: Vector{Int} , ls:: LoopSet , op:: Operation )
211
213
instr = instruction (op)
@@ -635,49 +637,51 @@ function generate_call_split(
635
637
end
636
638
return generate_call_types (ls, preserve, shouldindbyind, roots, extra_args, inlineu₁u₂, thread, debug)
637
639
end
640
+
638
641
# Try to condense in type stable manner
639
642
function generate_call_types (
640
643
ls:: LoopSet , preserve:: Vector{Symbol} , shouldindbyind:: Vector{Bool} , roots:: Vector{Bool} , extra_args:: Expr , (inline,u₁,u₂):: Tuple{Bool,Int8,Int8} , thread:: UInt , debug:: Bool
641
644
)
642
645
# good place to check for split
643
- operation_descriptions = Expr (:tuple )
644
- varnames = Symbol[]; ids = Vector {Int} (undef, length (operations (ls)))
645
- ops = operations (ls)
646
- for op ∈ ops
647
- instr:: Instruction = instruction (op)
648
- if (isconstant (op) && (instr == LOOPCONSTANT)) && (! roots[identifier (op)])
649
- instr = op. instruction = DROPPEDCONSTANT
650
- end
651
- push! (operation_descriptions. args, QuoteNode (instr. mod))
652
- push! (operation_descriptions. args, QuoteNode (instr. instr))
653
- push! (operation_descriptions. args, OperationStruct! (varnames, ids, ls, op))
654
- end
655
- arraysymbolinds = Symbol[]
656
- arrayref_descriptions = Expr (:tuple )
657
- duplicate_ref = fill (false , length (ls. refs_aliasing_syms))
658
- for (j,ref) ∈ enumerate (ls. refs_aliasing_syms)
659
- vpref = vptr (ref)
660
- # duplicate_ref[j] ≠ 0 && continue
661
- duplicate_ref[j] && continue
662
- push! (arrayref_descriptions. args, ArrayRefStruct (ls, ref, arraysymbolinds, ids))
663
- end
664
- argmeta = argmeta_and_consts_description (ls, arraysymbolinds)
665
- loop_bounds = loop_boundaries (ls, shouldindbyind)
666
- loop_syms = tuple_expr (QuoteNode, ls. loopsymbols)
667
- func = debug ? lv (:_avx_loopset_debug ) : lv (:_avx_! )
668
- lbarg = debug ? Expr (:call , :typeof , loop_bounds) : loop_bounds
669
- configarg = (inline,u₁,u₂,ls. isbroadcast,thread)
670
- unroll_param_tup = Expr (:call , lv (:avx_config_val ), :(Val {$configarg} ()), VECTORWIDTHSYMBOL)
671
- q = Expr (:call , func, unroll_param_tup, val (operation_descriptions), val (arrayref_descriptions), val (argmeta), val (loop_syms))
672
-
673
- add_reassigned_syms! (extra_args, ls) # counterpart to `add_ops!` constants
674
- for (opid,sym) ∈ ls. preamble_symsym # counterpart to process_metadata! symsym extraction
675
- if instruction (ops[opid]) ≠ DROPPEDCONSTANT
676
- push! (extra_args. args, sym)
677
- end
646
+ operation_descriptions = Expr (:tuple )
647
+ varnames = Symbol[]; ids = Vector {Int} (undef, length (operations (ls)))
648
+ ops = operations (ls)
649
+ for op ∈ ops
650
+ instr:: Instruction = instruction (op)
651
+ if (isconstant (op) && (instr == LOOPCONSTANT)) && (! roots[identifier (op)])
652
+ instr = op. instruction = DROPPEDCONSTANT
653
+ end
654
+ push! (operation_descriptions. args, QuoteNode (instr. mod))
655
+ push! (operation_descriptions. args, QuoteNode (instr. instr))
656
+ push! (operation_descriptions. args, OperationStruct! (varnames, ids, ls, op))
657
+ end
658
+ arraysymbolinds = Symbol[]
659
+ arrayref_descriptions = Expr (:tuple )
660
+ duplicate_ref = fill (false , length (ls. refs_aliasing_syms))
661
+ for (j,ref) ∈ enumerate (ls. refs_aliasing_syms)
662
+ vpref = vptr (ref)
663
+ # duplicate_ref[j] ≠ 0 && continue
664
+ duplicate_ref[j] && continue
665
+ push! (arrayref_descriptions. args, ArrayRefStruct (ls, ref, arraysymbolinds, ids))
666
+ end
667
+ argmeta = argmeta_and_consts_description (ls, arraysymbolinds)
668
+ loop_bounds = loop_boundaries (ls, shouldindbyind)
669
+ loop_syms = tuple_expr (QuoteNode, ls. loopsymbols)
670
+ func = debug ? lv (:_avx_loopset_debug ) : lv (:_avx_! )
671
+ lbarg = debug ? Expr (:call , :typeof , loop_bounds) : loop_bounds
672
+ configarg = (inline,u₁,u₂,ls. isbroadcast,thread)
673
+ unroll_param_tup = Expr (:call , lv (:avx_config_val ), :(Val {$configarg} ()), VECTORWIDTHSYMBOL)
674
+ q = Expr (:call , func, unroll_param_tup, val (operation_descriptions), val (arrayref_descriptions), val (argmeta), val (loop_syms))
675
+
676
+ add_reassigned_syms! (extra_args, ls) # counterpart to `add_ops!` constants
677
+ for (opid,sym) ∈ ls. preamble_symsym # counterpart to process_metadata! symsym extraction
678
+ if instruction (ops[opid]) ≠ DROPPEDCONSTANT
679
+ push! (extra_args. args, sym)
678
680
end
679
- append! (extra_args. args, arraysymbolinds) # add_array_symbols!
681
+ end
682
+ append! (extra_args. args, arraysymbolinds) # add_array_symbols!
680
683
add_external_functions! (extra_args, ls) # extract_external_functions!
684
+ add_outerreduct_types! (extra_args, ls) # extract_outerreduct_types!
681
685
if debug
682
686
vecwidthdefq = Expr (:block )
683
687
push! (q. args, Expr (:tuple , lbarg, extra_args))
@@ -686,17 +690,22 @@ function generate_call_types(
686
690
vecwidthdefq = Expr (:block , Expr (:(= ), vargsym, Expr (:tuple , lbarg, extra_args)))
687
691
push! (q. args, Expr (:call , GlobalRef (Base,:Val ), Expr (:call , GlobalRef (Base,:typeof ), vargsym)), Expr (:(... ), Expr (:call , lv (:flatten_to_tuple ), vargsym)))
688
692
end
689
- define_eltype_vec_width! (vecwidthdefq, ls, nothing )
690
- push! (vecwidthdefq. args, q)
691
- if debug
692
- pushpreamble! (ls,vecwidthdefq)
693
- Expr (:block , ls. prepreamble, ls. preamble)
694
- else
695
- setup_call_final (ls, setup_outerreduct_preserve (ls, vecwidthdefq, preserve))
696
- end
693
+ define_eltype_vec_width! (vecwidthdefq, ls, nothing , true )
694
+ push! (vecwidthdefq. args, q)
695
+ if debug
696
+ pushpreamble! (ls,vecwidthdefq)
697
+ Expr (:block , ls. prepreamble, ls. preamble)
698
+ else
699
+ setup_call_final (ls, setup_outerreduct_preserve (ls, vecwidthdefq, preserve))
700
+ end
701
+ end
702
+ # @inline reductinittype(::T) where {T} = StaticType{T}()
703
+ typeof_expr (op:: Operation ) = Expr (:call , GlobalRef (Base,:typeof ), name (op))
704
+ function add_outerreduct_types! (extra_args:: Expr , ls:: LoopSet ) # extract_outerreduct_types!
705
+ for or ∈ ls. outer_reductions
706
+ push! (extra_args. args, typeof_expr (operations (ls)[or]))
707
+ end
697
708
end
698
-
699
-
700
709
"""
701
710
check_args(::Vararg{AbstractArray})
702
711
0 commit comments