Skip to content

Commit bca8e97

Browse files
authored
Merge branch 'master' into jas/compat
2 parents 419db63 + 28f6ffc commit bca8e97

File tree

5 files changed

+202
-31
lines changed

5 files changed

+202
-31
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ Parameters = "0"
1717
SIMDPirates = "0"
1818
SLEEFPirates = "0"
1919
VectorizationBase = "0"
20-
julia = "1"
20+
julia = "1.3"
2121

2222
[extras]
2323
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

src/condense_loopset.jl

Lines changed: 101 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,70 +1,145 @@
11

2-
@enum IndexType::UInt8 NotAnIndex=0 LoopIndex=1 ComputedIndex=2 SymbolicIndex=3 LiteralIndex=4
2+
@enum IndexType::UInt8 NotAnIndex=0 LoopIndex=1 ComputedIndex=2 SymbolicIndex=3
3+
4+
Base.:|(u::Unsigned, it::IndexType) = u | UInt8(it)
5+
Base.:(==)(u::Unsigned, it::IndexType) = (u % UInt8) == UInt8(it)
36

47
struct ArrayRefStruct
58
index_types::UInt64
69
indices::UInt64
710
end
8-
tup_to_vec(t::NTuple{W,T}) where {W,T} = ntuple(Val(W)) do w @inbounds Core.VecElement(t[w]) end
9-
vec_to_tup(v::Vec{W,T}) where {W,T} = ntuple(Val(W)) do w @inbounds (v[w]).value end
10-
vec_to_tup(v::SVec{W,T}) where {W,T} = ntuple(Val(W)) do w @inbounds (v[w]) end
11-
function ArrayRefStruct(ls::LoopSet, mref::ArrayReferenceMeta)
11+
12+
function findindoradd!(v::Vector{T}, s::T) where {T}
13+
ind = findfirst(sᵢ -> sᵢ == s, v)
14+
ind === nothing || return ind
15+
push!(v, s)
16+
length(v)
17+
end
18+
function ArrayRefStruct(ls::LoopSet, mref::ArrayReferenceMeta, arraysymbolinds::Vector{Symbol})
1219
index_types = zero(UInt64)
13-
indices = vbroadcast(SVec{8,UInt64}, zero(UInt64))
20+
indices = zero(UInt64)
1421
indv = mref.ref.indices
22+
# we can discard that the array was considered discontiguous, as it should be recovered from type information
1523
start = 1 + (first(indv) === Symbol("##DISCONTIGUOUSSUBARRAY##"))
1624
for (n,ind) enumerate(@view(indv[start:end]))
1725
index_types <<= 8
18-
indices <<= 16
19-
if ind isa Int
20-
21-
elseif mref.loopindex[n]
26+
indices <<= 8
27+
if mref.loopindex[n]
28+
index_types |= LoopIndex
2229
else
30+
parent = getop(opdict, ind, nothing)
31+
if parent === nothing
32+
index_types |= SymbolicIndex
33+
indices |= findindoradd!(arraysymbolinds, ind)
34+
else
35+
index_types |= ComputedIndex
36+
indices |= identifier(parent)
37+
end
2338
end
2439
end
25-
ArrayRefStruct( index_types, vec_to_tup(indices) )
40+
ArrayRefStruct( index_types, indices )
2641
end
2742

2843
struct OperationStruct
2944
instruction::Instruction
3045
loopdeps::UInt64
3146
reduceddeps::UInt64
3247
parents::UInt64
33-
array::UInt64
48+
array::UInt8
3449
end
3550
function findmatchingarray(ls::LoopSet, array::Symbol)
36-
id = zero(UInt64)
37-
for (as,_) ls.includedarrays
38-
id += one(UInt64)
39-
if as === arraysym
40-
return id
41-
end
51+
id = 0x01
52+
for as ls.refs_aliasing_syms
53+
vptr(as) === array && return id
54+
id += 0x01
4255
end
43-
zero(UInt64)
56+
0x00
4457
end
45-
filled_4byte_chunks(u::UInt64) = leading_zeros(u) >> 2
58+
filled_4byte_chunks(u::UInt64) = leading_zeros(u) >>> 2
4659
num_loop_deps(os::OperationStruct) = filled_4byte_chunks(os.loopdeps)
4760
num_reduced_deps(os::OperationStruct) = filled_4byte_chunks(os.reduced_deps)
4861
num_parents(os::OperationStruct) = filled_4byte_chunks(os.parents)
4962

50-
function loodeps_uint(ls::LoopSet, op::Operation)
63+
function loopdeps_uint(ls::LoopSet, loopsyms::Vector{Symbol})
5164
ld = zero(UInt64) # leading_zeros(ld) >> 2 yields the number of loopdeps
52-
for d loopdependencies(op)
65+
for d loopsyms
5366
ld <<= 4
5467
ld |= getloopid(ls, d)
5568
end
5669
ld
5770
end
71+
loopdeps_uint(ls::LoopSet, op::Operation) = shifted_loopset(ls, loopdependencies(op))
72+
reduceddeps_uint(ls::LoopSet, op::Operation) = shifted_loopset(ls, reduceddependencies(op))
73+
function parents_uint(ls::LoopSet, op::Operation)
74+
p = zero(UInt64)
75+
for parent parents(op)
76+
p <<= 8
77+
p |= identifier(op)
78+
end
79+
p
80+
end
5881
function OperationStruct(ls::LoopSet, op::Operation)
5982
instr = instruction(op)
60-
array = accesses_memory(op) ? findmatchingarray(ls, name(op.ref)) : zero(UInt64)
61-
83+
ld = loopdeps_uint(ls, op)
84+
rd = reduceddeps_uint(ls, op)
85+
p = parents_uint(ls, op)
86+
array = accesses_memory(op) ? findmatchingarray(ls, vptr(op.ref)) : 0x00
87+
OperationStruct(
88+
instr, ld, rd, p, array
89+
)
6290
end
6391
## turn a LoopSet into a type object which can be used to reconstruct the LoopSet.
6492

6593

94+
function loop_boundaries(ls::LoopSet)
95+
lbd = Expr(:tuple)
96+
for loop ls.loops
97+
startexact = loop.startexact
98+
stopexact = loop.stopexact
99+
lexpr = if startexact & stopexact
100+
Expr(:call, Expr(:curly, lv(:StaticUnitRange), loop.starthint, loop.stophint))
101+
elseif startexact
102+
Expr(:call, Expr(:curly, lv(:StaticLowerUnitRange), loop.starthint), loop.stopsym)
103+
elseif stopexact
104+
Expr(:call, Expr(:curly, lv(:StaticUpperUnitRange), loop.stophint), loop.startsym)
105+
else
106+
Expr(:call, Expr(:call, :(:), loop.startsym, loop.stopsym))
107+
end
108+
push!(lbd, lexpr)
109+
end
110+
lbd
111+
end
112+
113+
function argmeta_and_costs_description(ls::LoopSet, arraysymbolinds)
114+
Expr(
115+
:curly, :Tuple,
116+
length(arraysymbolinds),
117+
Expr(:curly, :Tuple, ls.outer_reductions...),
118+
Expr(:curly, :Tuple, first.(ls.preamble_symsym)...),
119+
Expr(:curly, :Tuple, ls.preamble_symint...),
120+
Expr(:curly, :Tuple, ls.preamble_symfloat...),
121+
Expr(:curly, :Tuple, ls.preamble_zeros...),
122+
Expr(:curly, :Tuple, ls.preamble_ones...)
123+
)
124+
end
125+
66126
# Try to condense in type stable manner
67-
function condense_operations(ls::LoopSet)
68-
127+
function generate_call(ls::LoopSet)
128+
operation_descriptions = Expr(:curly, :Tuple)
129+
foreach(op -> push!(operation_descriptions.args, OperationStruct(ls, op)), operations(ls))
130+
arraysymbolinds = Symbol[]
131+
arrayref_descriptions = Expr(:curly, :Tuple)
132+
foreach(ref -> push!(arrayref_descriptions.args, ArrayRefStruct(ls, ref, arraysymbolinds)), ls.refs_aliasing_syms)
133+
argmeta = argmeta_and_consts_description(ls, arraysymbolinds)
134+
loop_bounds = loop_boundaries(ls)
135+
136+
q = Expr(:call, :_avx!, operation_descriptions, arrayref_descriptions, argmeta, loop_bounds)
137+
138+
foreach(ref -> push!(q.args, vptr(ref)), ls.refs_aliasing_syms)
139+
foreach(is -> push!(q.args, last(is)), ls.preamble_symsym)
140+
append!(q.args, arraysymbolinds)
141+
q
69142
end
70143

144+
145+

src/graphs.jl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ isdense(::Type{<:DenseArray}) = true
3434

3535

3636
# For passing options like array types and mask
37-
struct LoopSetOptions
37+
# struct LoopSetOptions
3838

39-
end
39+
# end
4040

4141
struct Loop
4242
itersymbol::Symbol
@@ -141,7 +141,7 @@ Base.@propagate_inbounds Base.getindex(lo::LoopOrder, i::Int) = lo.oporder[i]
141141
Base.@propagate_inbounds Base.getindex(lo::LoopOrder, i...) = lo.oporder[LinearIndices(size(lo))[i...]]
142142

143143
# Must make it easy to iterate
144-
# outer_reductions is a vector of indixes (within operation vectors) of the reduction operation, eg the vmuladd op in a dot product
144+
# outer_reductions is a vector of indices (within operation vectors) of the reduction operation, eg the vmuladd op in a dot product
145145
struct LoopSet
146146
loopsymbols::Vector{Symbol}
147147
loops::Vector{Loop}
@@ -234,6 +234,7 @@ function LoopSet()
234234
Bool[], Bool[], gensym(:W), gensym(:T)
235235
)
236236
end
237+
237238
num_loops(ls::LoopSet) = length(ls.loops)
238239
function oporder(ls::LoopSet)
239240
N = length(ls.loop_order.loopnames)

src/lowering.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -709,7 +709,7 @@ function reduce_expr!(q::Expr, ls::LoopSet, U::Int)
709709
end
710710
end
711711
function gc_preserve(ls::LoopSet, q::Expr)
712-
length(ls.includedarrays) == 0 && return q # is this even possible?
712+
length(ls.includedarrays) == 0 && return q
713713
gcp = Expr(:macrocall, Expr(:(.), :GC, QuoteNode(Symbol("@preserve"))), LineNumberNode(@__LINE__, @__FILE__))
714714
for array ls.includedarrays
715715
push!(gcp.args, array)

src/reconstruct_loopset.jl

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
function Loop(::Type{UnitRange{Int}})
2+
Loop(gensym(:n), 0, 1024, gensym(:loopstart), gensym(:loopstop), false, false)::Loop
3+
end
4+
function Loop(::Type{StaticUpperUnitRange{U}}) where {U}
5+
Loop(gensym(:n), 0, U, gensym(:loopstart), Symbol(""), false, true)::Loop
6+
end
7+
function Loop(::Type{StaticLowerUnitRange{L}}) where {L}
8+
Loop(gensym(:n), L, L + 1024, Symbol(""), gensym(:loopstop), true, false)::Loop
9+
end
10+
function Loop(::Type{StaticUnitRange{L,U}}) where {L,U}
11+
Loop(gensym(:n), L, U, Symbol(""), Symbol(""), true, true)::Loop
12+
end
13+
14+
function add_loops!(ls::LoopSet, LB)
15+
loopsyms = [gensym(:n) for _ eachindex(LB)]
16+
for l LB
17+
add_loop!(ls, Loop(LB)::Loop)
18+
end
19+
20+
end
21+
function add_ops!(ls::LoopSet, ops::Vector{OperationStruct}, start::Int = 0, stopvptr = nothing)
22+
num_ops = length(ops)
23+
while start < num_ops
24+
start += 1
25+
opdescript = ops[start]
26+
27+
stopvptr === vptr(op) && return start
28+
end
29+
0
30+
end
31+
numinds(u::UInt) = 8 - (leading_zeros(u) >>> 3)
32+
function add_mref!(ls::LoopSet, ar::ArrayRef, arraysymbolinds::Vector{Symbol}, opsymbols::Vector{Symbol}, ::Type{PackedStridedPointer{T, N}}) where {T, N}
33+
index_types = ar.index_types
34+
indices = ar.indices
35+
ni = numinds(index_types)
36+
Ni = N + 1
37+
@assert ni == Ni
38+
index_vec = Vector{Symbol}(undef, Ni)
39+
while index_types != zero(UInt64)
40+
ind = indices % UInt8
41+
symind = if index_types == LoopIndex
42+
ls.loopsymbols[ind]
43+
elseif index_types == ComputedIndex
44+
opsymbols[ind]
45+
else
46+
@assert index_types == SymbolicIndex
47+
arraysymbolinds[ind]
48+
end
49+
index_vec[ni] = symind
50+
index_types >>>= 8
51+
indices >>>= 8
52+
ni -= 1
53+
end
54+
55+
end
56+
57+
function add_mrefs!(ls::LoopSet, arf::Vector{ArrayRefStruct}, as::Vector{Symbol}, os::Vector{Symbol}, vargs)
58+
for i eachindex(arf)
59+
ref = arf[i]
60+
ptr_type = vargs[i]
61+
62+
end
63+
end
64+
function process_metadata!(ls::LoopSet, AM, num_arrays::Int)
65+
num_asi = (AM[1])::Int
66+
arraysymbolinds = [gensym(:asi) for _ 1:num_asi]
67+
append!(ls.outer_reductions, AM[2].parameters)
68+
for (i,si) enumerate(AM[3].parameters)
69+
sii = si::Int
70+
s = gensym(:symlicm)
71+
push!(ls.preamble_symsym, (si,s))
72+
pushpreamble!(ls, Expr(:(=), s, Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__,@__FILE__), Expr(:ref, :varg, num_arrays + i))))
73+
end
74+
append!(ls.preamble_symint, AM[4].parameters)
75+
append!(ls.preamble_symfloat, AM[5].parameters)
76+
append!(ls.preamble_zeros, AM[6].parameters)
77+
append!(ls.preamble_ones, AM[7].parameters)
78+
arraysymbolinds
79+
end
80+
function avx_body(ops, arf, AM, LB, vargs)
81+
ls = LoopSet()
82+
add_loops!(ls, LB)
83+
arraysymbolinds = process_metadata!(ls, AM, length(arf))
84+
opsymbols = [gensym(:op) for _ eachindex(ops)]
85+
86+
end
87+
88+
@generated function _avx!(::Type{OPS}, ::Type{ARF}, ::Type{AM}, lb::LB, vargs...)
89+
avx_body(
90+
OperationStruct[OPS.parameters...],
91+
ArrayRefStruct[ARF.parameters...],
92+
AM.parameters, LB.parameters, vargs
93+
)
94+
end
95+

0 commit comments

Comments
 (0)