|
1 | 1 |
|
2 |
| -@enum IndexType::UInt8 NotAnIndex=0 LoopIndex=1 ComputedIndex=2 SymbolicIndex=3 LiteralIndex=4 |
| 2 | +@enum IndexType::UInt8 NotAnIndex=0 LoopIndex=1 ComputedIndex=2 SymbolicIndex=3 |
| 3 | + |
| 4 | +Base.:|(u::Unsigned, it::IndexType) = u | UInt8(it) |
| 5 | +Base.:(==)(u::Unsigned, it::IndexType) = (u % UInt8) == UInt8(it) |
3 | 6 |
|
4 | 7 | struct ArrayRefStruct
|
5 | 8 | index_types::UInt64
|
6 | 9 | indices::UInt64
|
7 | 10 | end
|
8 |
| -tup_to_vec(t::NTuple{W,T}) where {W,T} = ntuple(Val(W)) do w @inbounds Core.VecElement(t[w]) end |
9 |
| -vec_to_tup(v::Vec{W,T}) where {W,T} = ntuple(Val(W)) do w @inbounds (v[w]).value end |
10 |
| -vec_to_tup(v::SVec{W,T}) where {W,T} = ntuple(Val(W)) do w @inbounds (v[w]) end |
11 |
| -function ArrayRefStruct(ls::LoopSet, mref::ArrayReferenceMeta) |
| 11 | + |
| 12 | +function findindoradd!(v::Vector{T}, s::T) where {T} |
| 13 | + ind = findfirst(sᵢ -> sᵢ == s, v) |
| 14 | + ind === nothing || return ind |
| 15 | + push!(v, s) |
| 16 | + length(v) |
| 17 | +end |
| 18 | +function ArrayRefStruct(ls::LoopSet, mref::ArrayReferenceMeta, arraysymbolinds::Vector{Symbol}) |
12 | 19 | index_types = zero(UInt64)
|
13 |
| - indices = vbroadcast(SVec{8,UInt64}, zero(UInt64)) |
| 20 | + indices = zero(UInt64) |
14 | 21 | indv = mref.ref.indices
|
| 22 | + # we can discard that the array was considered discontiguous, as it should be recovered from type information |
15 | 23 | start = 1 + (first(indv) === Symbol("##DISCONTIGUOUSSUBARRAY##"))
|
16 | 24 | for (n,ind) ∈ enumerate(@view(indv[start:end]))
|
17 | 25 | index_types <<= 8
|
18 |
| - indices <<= 16 |
19 |
| - if ind isa Int |
20 |
| - |
21 |
| - elseif mref.loopindex[n] |
| 26 | + indices <<= 8 |
| 27 | + if mref.loopindex[n] |
| 28 | + index_types |= LoopIndex |
22 | 29 | else
|
| 30 | + parent = getop(opdict, ind, nothing) |
| 31 | + if parent === nothing |
| 32 | + index_types |= SymbolicIndex |
| 33 | + indices |= findindoradd!(arraysymbolinds, ind) |
| 34 | + else |
| 35 | + index_types |= ComputedIndex |
| 36 | + indices |= identifier(parent) |
| 37 | + end |
23 | 38 | end
|
24 | 39 | end
|
25 |
| - ArrayRefStruct( index_types, vec_to_tup(indices) ) |
| 40 | + ArrayRefStruct( index_types, indices ) |
26 | 41 | end
|
27 | 42 |
|
28 | 43 | struct OperationStruct
|
29 | 44 | instruction::Instruction
|
30 | 45 | loopdeps::UInt64
|
31 | 46 | reduceddeps::UInt64
|
32 | 47 | parents::UInt64
|
33 |
| - array::UInt64 |
| 48 | + array::UInt8 |
34 | 49 | end
|
35 | 50 | function findmatchingarray(ls::LoopSet, array::Symbol)
|
36 |
| - id = zero(UInt64) |
37 |
| - for (as,_) ∈ ls.includedarrays |
38 |
| - id += one(UInt64) |
39 |
| - if as === arraysym |
40 |
| - return id |
41 |
| - end |
| 51 | + id = 0x01 |
| 52 | + for as ∈ ls.refs_aliasing_syms |
| 53 | + vptr(as) === array && return id |
| 54 | + id += 0x01 |
42 | 55 | end
|
43 |
| - zero(UInt64) |
| 56 | + 0x00 |
44 | 57 | end
|
45 |
| -filled_4byte_chunks(u::UInt64) = leading_zeros(u) >> 2 |
| 58 | +filled_4byte_chunks(u::UInt64) = leading_zeros(u) >>> 2 |
46 | 59 | num_loop_deps(os::OperationStruct) = filled_4byte_chunks(os.loopdeps)
|
47 | 60 | num_reduced_deps(os::OperationStruct) = filled_4byte_chunks(os.reduced_deps)
|
48 | 61 | num_parents(os::OperationStruct) = filled_4byte_chunks(os.parents)
|
49 | 62 |
|
50 |
| -function loodeps_uint(ls::LoopSet, op::Operation) |
| 63 | +function loopdeps_uint(ls::LoopSet, loopsyms::Vector{Symbol}) |
51 | 64 | ld = zero(UInt64) # leading_zeros(ld) >> 2 yields the number of loopdeps
|
52 |
| - for d ∈ loopdependencies(op) |
| 65 | + for d ∈ loopsyms |
53 | 66 | ld <<= 4
|
54 | 67 | ld |= getloopid(ls, d)
|
55 | 68 | end
|
56 | 69 | ld
|
57 | 70 | end
|
| 71 | +loopdeps_uint(ls::LoopSet, op::Operation) = shifted_loopset(ls, loopdependencies(op)) |
| 72 | +reduceddeps_uint(ls::LoopSet, op::Operation) = shifted_loopset(ls, reduceddependencies(op)) |
| 73 | +function parents_uint(ls::LoopSet, op::Operation) |
| 74 | + p = zero(UInt64) |
| 75 | + for parent ∈ parents(op) |
| 76 | + p <<= 8 |
| 77 | + p |= identifier(op) |
| 78 | + end |
| 79 | + p |
| 80 | +end |
58 | 81 | function OperationStruct(ls::LoopSet, op::Operation)
|
59 | 82 | instr = instruction(op)
|
60 |
| - array = accesses_memory(op) ? findmatchingarray(ls, name(op.ref)) : zero(UInt64) |
61 |
| - |
| 83 | + ld = loopdeps_uint(ls, op) |
| 84 | + rd = reduceddeps_uint(ls, op) |
| 85 | + p = parents_uint(ls, op) |
| 86 | + array = accesses_memory(op) ? findmatchingarray(ls, vptr(op.ref)) : 0x00 |
| 87 | + OperationStruct( |
| 88 | + instr, ld, rd, p, array |
| 89 | + ) |
62 | 90 | end
|
63 | 91 | ## turn a LoopSet into a type object which can be used to reconstruct the LoopSet.
|
64 | 92 |
|
65 | 93 |
|
| 94 | +function loop_boundaries(ls::LoopSet) |
| 95 | + lbd = Expr(:tuple) |
| 96 | + for loop ∈ ls.loops |
| 97 | + startexact = loop.startexact |
| 98 | + stopexact = loop.stopexact |
| 99 | + lexpr = if startexact & stopexact |
| 100 | + Expr(:call, Expr(:curly, lv(:StaticUnitRange), loop.starthint, loop.stophint)) |
| 101 | + elseif startexact |
| 102 | + Expr(:call, Expr(:curly, lv(:StaticLowerUnitRange), loop.starthint), loop.stopsym) |
| 103 | + elseif stopexact |
| 104 | + Expr(:call, Expr(:curly, lv(:StaticUpperUnitRange), loop.stophint), loop.startsym) |
| 105 | + else |
| 106 | + Expr(:call, Expr(:call, :(:), loop.startsym, loop.stopsym)) |
| 107 | + end |
| 108 | + push!(lbd, lexpr) |
| 109 | + end |
| 110 | + lbd |
| 111 | +end |
| 112 | + |
| 113 | +function argmeta_and_costs_description(ls::LoopSet, arraysymbolinds) |
| 114 | + Expr( |
| 115 | + :curly, :Tuple, |
| 116 | + length(arraysymbolinds), |
| 117 | + Expr(:curly, :Tuple, ls.outer_reductions...), |
| 118 | + Expr(:curly, :Tuple, first.(ls.preamble_symsym)...), |
| 119 | + Expr(:curly, :Tuple, ls.preamble_symint...), |
| 120 | + Expr(:curly, :Tuple, ls.preamble_symfloat...), |
| 121 | + Expr(:curly, :Tuple, ls.preamble_zeros...), |
| 122 | + Expr(:curly, :Tuple, ls.preamble_ones...) |
| 123 | + ) |
| 124 | +end |
| 125 | + |
66 | 126 | # Try to condense in type stable manner
|
67 |
| -function condense_operations(ls::LoopSet) |
68 |
| - |
| 127 | +function generate_call(ls::LoopSet) |
| 128 | + operation_descriptions = Expr(:curly, :Tuple) |
| 129 | + foreach(op -> push!(operation_descriptions.args, OperationStruct(ls, op)), operations(ls)) |
| 130 | + arraysymbolinds = Symbol[] |
| 131 | + arrayref_descriptions = Expr(:curly, :Tuple) |
| 132 | + foreach(ref -> push!(arrayref_descriptions.args, ArrayRefStruct(ls, ref, arraysymbolinds)), ls.refs_aliasing_syms) |
| 133 | + argmeta = argmeta_and_consts_description(ls, arraysymbolinds) |
| 134 | + loop_bounds = loop_boundaries(ls) |
| 135 | + |
| 136 | + q = Expr(:call, :_avx!, operation_descriptions, arrayref_descriptions, argmeta, loop_bounds) |
| 137 | + |
| 138 | + foreach(ref -> push!(q.args, vptr(ref)), ls.refs_aliasing_syms) |
| 139 | + foreach(is -> push!(q.args, last(is)), ls.preamble_symsym) |
| 140 | + append!(q.args, arraysymbolinds) |
| 141 | + q |
69 | 142 | end
|
70 | 143 |
|
| 144 | + |
| 145 | + |
0 commit comments