Skip to content

Commit 3b18781

Browse files
committed
Further progress towards 0.4.
1 parent dda4b61 commit 3b18781

File tree

4 files changed

+139
-34
lines changed

4 files changed

+139
-34
lines changed

src/condense_loopset.jl

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ struct OperationStruct
4545
loopdeps::UInt64
4646
reduceddeps::UInt64
4747
parents::UInt64
48+
node_type::OperationType
4849
array::UInt8
4950
end
5051
function findmatchingarray(ls::LoopSet, array::Symbol)
@@ -55,12 +56,14 @@ function findmatchingarray(ls::LoopSet, array::Symbol)
5556
end
5657
0x00
5758
end
58-
filled_4byte_chunks(u::UInt64) = leading_zeros(u) >>> 2
59+
filled_4byte_chunks(u::UInt64) = 16 - (leading_zeros(u) >>> 2)
60+
filled_8byte_chunks(u::UInt64) = 8 - (leading_zeros(u) >>> 3)
61+
5962
num_loop_deps(os::OperationStruct) = filled_4byte_chunks(os.loopdeps)
6063
num_reduced_deps(os::OperationStruct) = filled_4byte_chunks(os.reduced_deps)
6164
num_parents(os::OperationStruct) = filled_4byte_chunks(os.parents)
6265

63-
function loopdeps_uint(ls::LoopSet, loopsyms::Vector{Symbol})
66+
function shifted_loopset(ls::LoopSet, loopsyms::Vector{Symbol})
6467
ld = zero(UInt64) # leading_zeros(ld) >> 2 yields the number of loopdeps
6568
for d loopsyms
6669
ld <<= 4
@@ -85,7 +88,7 @@ function OperationStruct(ls::LoopSet, op::Operation)
8588
p = parents_uint(ls, op)
8689
array = accesses_memory(op) ? findmatchingarray(ls, vptr(op.ref)) : 0x00
8790
OperationStruct(
88-
instr, ld, rd, p, array
91+
instr, ld, rd, p, op.node_type, array
8992
)
9093
end
9194
## turn a LoopSet into a type object which can be used to reconstruct the LoopSet.

src/determinestrategy.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ function cost(op::Operation, unrolled::Symbol, Wshift::Int, size_T::Int = op.ele
1515
# Wshift == dependson(op, unrolled) ? Wshift : 0
1616
# c = first(cost(instruction(op), Wshift, size_T))::Int
1717
instr = instruction(op)
18+
if length(parents(op)) == 1
19+
if instr == Instruction(:-) || instr === Instruction(:vsub) || instr == Instruction(:+) || instr == Instruction(:vadd)
20+
return 0.0, 0, 1
21+
end
22+
end
1823
opisunrolled = dependson(op, unrolled)
1924
srt, sl, srp = opisunrolled ? vector_cost(instr, Wshift, size_T) : scalar_cost(instr)
2025
if accesses_memory(op)

src/graphs.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,8 @@ function getloopid(ls::LoopSet, s::Symbol)::Int
247247
end
248248
end
249249
getloop(ls::LoopSet, s::Symbol) = ls.loops[getloopid(ls, s)]
250+
getloop(ls::LoopSet, i::Integer) = ls.loops[i]
251+
getloopsym(ls::LoopSet, i::Integer) = ls.loopsymbols[i]
250252
Base.length(ls::LoopSet, s::Symbol) = length(getloop(ls, s))
251253

252254
isstaticloop(ls::LoopSet, s::Symbol) = isstaticloop(getloop(ls,s))

src/reconstruct_loopset.jl

Lines changed: 126 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,42 @@
1-
function Loop(::Type{UnitRange{Int}})
2-
Loop(gensym(:n), 0, 1024, gensym(:loopstart), gensym(:loopstop), false, false)::Loop
1+
function Loop(ls::LoopSet, l::Int, ::Type{UnitRange{Int}})
2+
start = gensym(:loopstart); stop = gensym(:loopstop)
3+
pushpreamble!(ls, Expr(:(=), start, Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__, @__FILE__), Expr(:(.), Expr(:ref, :lb, l), QuoteNode(:start)))))
4+
pushpreamble!(ls, Expr(:(=), stop, Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__, @__FILE__), Expr(:(.), Expr(:ref, :lb, l), QuoteNode(:stop)))))
5+
Loop(gensym(:n), 0, 1024, start, stop, false, false)::Loop
36
end
4-
function Loop(::Type{StaticUpperUnitRange{U}}) where {U}
5-
Loop(gensym(:n), 0, U, gensym(:loopstart), Symbol(""), false, true)::Loop
7+
function Loop(ls::LoopSet, l::Int, ::Type{StaticUpperUnitRange{U}}) where {U}
8+
start = gensym(:loopstart)
9+
pushpreamble!(ls, Expr(:(=), start, Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__, @__FILE__), Expr(:(.), Expr(:ref, :lb, l), QuoteNode(:L)))))
10+
Loop(gensym(:n), U - 1024, U, start, Symbol(""), false, true)::Loop
611
end
7-
function Loop(::Type{StaticLowerUnitRange{L}}) where {L}
8-
Loop(gensym(:n), L, L + 1024, Symbol(""), gensym(:loopstop), true, false)::Loop
12+
function Loop(ls::LoopSet, l::Int, ::Type{StaticLowerUnitRange{L}}) where {L}
13+
stop = gensym(:loopstop)
14+
pushpreamble!(ls, Expr(:(=), stop, Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__, @__FILE__), Expr(:(.), Expr(:ref, :lb, l), QuoteNode(:U)))))
15+
Loop(gensym(:n), L, L + 1024, Symbol(""), stop, true, false)::Loop
916
end
10-
function Loop(::Type{StaticUnitRange{L,U}}) where {L,U}
17+
function Loop(ls, l, ::Type{StaticUnitRange{L,U}}) where {L,U}
1118
Loop(gensym(:n), L, U, Symbol(""), Symbol(""), true, true)::Loop
1219
end
1320

1421
function add_loops!(ls::LoopSet, LB)
1522
loopsyms = [gensym(:n) for _ eachindex(LB)]
1623
for l LB
17-
add_loop!(ls, Loop(LB)::Loop)
24+
add_loop!(ls, Loop(ls, l, LB)::Loop)
1825
end
19-
20-
end
21-
function add_ops!(ls::LoopSet, ops::Vector{OperationStruct}, start::Int = 0, stopvptr = nothing)
22-
num_ops = length(ops)
23-
while start < num_ops
24-
start += 1
25-
opdescript = ops[start]
26-
27-
stopvptr === vptr(op) && return start
28-
end
29-
0
3026
end
31-
numinds(u::UInt) = 8 - (leading_zeros(u) >>> 3)
32-
function add_mref!(ls::LoopSet, ar::ArrayRef, arraysymbolinds::Vector{Symbol}, opsymbols::Vector{Symbol}, ::Type{PackedStridedPointer{T, N}}) where {T, N}
27+
function ArrayReferenceMeta(
28+
ls::LoopSet, ar::ArrayRefStruct, arraysymbolinds::Vector{Symbol}, opsymbols::Vector{Symbol},
29+
array::Symbol, vp::Symbol
30+
)
3331
index_types = ar.index_types
3432
indices = ar.indices
35-
ni = numinds(index_types)
36-
Ni = N + 1
37-
@assert ni == Ni
38-
index_vec = Vector{Symbol}(undef, Ni)
33+
ni = filled_8byte_chunks(index_types)
34+
index_vec = Vector{Symbol}(undef, ni)
35+
loopedindex = fill(false, ni)
3936
while index_types != zero(UInt64)
4037
ind = indices % UInt8
4138
symind = if index_types == LoopIndex
39+
loopedindex[ni] = true
4240
ls.loopsymbols[ind]
4341
elseif index_types == ComputedIndex
4442
opsymbols[ind]
@@ -51,15 +49,61 @@ function add_mref!(ls::LoopSet, ar::ArrayRef, arraysymbolinds::Vector{Symbol}, o
5149
indices >>>= 8
5250
ni -= 1
5351
end
54-
52+
ArrayReferenceMeta(
53+
ArrayReference(vp, index_vec),
54+
loopedindex, array
55+
)
5556
end
5657

57-
function add_mrefs!(ls::LoopSet, arf::Vector{ArrayRefStruct}, as::Vector{Symbol}, os::Vector{Symbol}, vargs)
58+
function add_mref!(ls::LoopSet, ars::ArrayRefStruct, arraysymbolinds::Vector{Symbol}, opsymbols::Vector{Symbol}, i::Int, ::Type{PackedStridedPointer{T, N}}) where {T, N}
59+
ar = ArrayReferenceMeta(ls, ar, arraysymbolinds, opsymbols, Symbol(""), gensym())
60+
pushpreamble!(ls, Expr(:(=), vptr(ar), Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__, @__FILE__), Expr(:ref, :vargs, i))))
61+
ar
62+
end
63+
function add_mref!(ls::LoopSet, ars::ArrayRefStruct, arraysymbolinds::Vector{Symbol}, opsymbols::Vector{Symbol}, i::Int, ::Type{RowMajorStridedPointer{T, N}}) where {T, N}
64+
ar = ArrayReferenceMeta(ls, ar, arraysymbolinds, opsymbols, Symbol(""), gensym())
65+
reverse!(ar.loopedindex); reverse!(getindices(ar)) # reverse the listed indices here, and transpose it to make it column major
66+
pushpreamble!(ls, Expr(:(=), vptr(ar), Expr(:call, lv(:Transpose), Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__, @__FILE__), Expr(:ref, :vargs, i)))))
67+
ar
68+
end
69+
function add_mref!(ls::LoopSet, ars::ArrayRefStruct, arraysymbolinds::Vector{Symbol}, opsymbols::Vector{Symbol}, i::Int, ::Type{StaticStridedPointer{T, X}}) where {T, X <: Tuple{1,Vararg}}
70+
ar = ArrayReferenceMeta(ls, ar, arraysymbolinds, opsymbols, Symbol(""), gensym())
71+
pushpreamble!(ls, Expr(:(=), vptr(ar), Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__, @__FILE__), Expr(:ref, :vargs, i))))
72+
ar
73+
end
74+
function add_mref!(ls::LoopSet, ars::ArrayRefStruct, arraysymbolinds::Vector{Symbol}, opsymbols::Vector{Symbol}, i::Int, ::Type{StaticStridedPointer{T, X}}) where {T, X <: Tuple}
75+
ar = ArrayReferenceMeta(ls, ar, arraysymbolinds, opsymbols, Symbol(""), gensym())
76+
pushpreamble!(ls, Expr(:(=), vptr(ar), Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__, @__FILE__), Expr(:ref, :vargs, i))))
77+
pushfirst!(getindices(ar), Symbol("##DISCONTIGUOUSSUBARRAY##"))
78+
ar
79+
end
80+
function add_mref!(ls::LoopSet, ars::ArrayRefStruct, arraysymbolinds::Vector{Symbol}, opsymbols::Vector{Symbol}, i::Int, ::Type{SparseStridedPointer{T, N}}) where {T, N}
81+
ar = ArrayReferenceMeta(ls, ar, arraysymbolinds, opsymbols, Symbol(""), gensym())
82+
pushpreamble!(ls, Expr(:(=), vptr(ar), Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__, @__FILE__), Expr(:ref, :vargs, i))))
83+
pushfirst!(getindices(ar), Symbol("##DISCONTIGUOUSSUBARRAY##"))
84+
ar
85+
end
86+
function add_mref!(ls::LoopSet, ars::ArrayRefStruct, arraysymbolinds::Vector{Symbol}, opsymbols::Vector{Symbol}, i::Int, ::Type{StaticStridedStruct{T, X}}) where {T, X <: Tuple{1,Vararg}}
87+
ar = ArrayReferenceMeta(ls, ar, arraysymbolinds, opsymbols, Symbol(""), gensym())
88+
pushpreamble!(ls, Expr(:(=), vptr(ar), Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__, @__FILE__), Expr(:ref, :vargs, i))))
89+
ar
90+
end
91+
function add_mref!(ls::LoopSet, ars::ArrayRefStruct, arraysymbolinds::Vector{Symbol}, opsymbols::Vector{Symbol}, i::Int, ::Type{StaticStridedStruct{T, X}}) where {T, X <: Tuple}
92+
ar = ArrayReferenceMeta(ls, ar, arraysymbolinds, opsymbols, Symbol(""), gensym())
93+
pushpreamble!(ls, Expr(:(=), vptr(ar), Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__, @__FILE__), Expr(:ref, :vargs, i))))
94+
pushfirst!(getindices(ar), Symbol("##DISCONTIGUOUSSUBARRAY##"))
95+
ar
96+
end
97+
98+
99+
100+
function create_mrefs!(ls::LoopSet, arf::Vector{ArrayRefStruct}, as::Vector{Symbol}, os::Vector{Symbol}, vargs)
101+
mrefs = Vector{ArrayReferenceMeta}(undef, length(arf))
58102
for i eachindex(arf)
59-
ref = arf[i]
60-
ptr_type = vargs[i]
61-
103+
ref = add_mref!(ls, arf[i], as, os, i, vargs[i])::ArrayReferenceMeta
104+
mrefs[i] = ref
62105
end
106+
mrefs
63107
end
64108
function process_metadata!(ls::LoopSet, AM, num_arrays::Int)
65109
num_asi = (AM[1])::Int
@@ -77,14 +121,65 @@ function process_metadata!(ls::LoopSet, AM, num_arrays::Int)
77121
append!(ls.preamble_ones, AM[7].parameters)
78122
arraysymbolinds
79123
end
124+
function parents_symvec(ls::LoopSet, u::Unsigned)
125+
i = filled_4byte_chunks(u)
126+
loops = Vector{Symbol}(undef, i)
127+
while u != zero(u)
128+
loops[i] = getloopsym(ls, ( u % UInt8 ) & 0x0f )
129+
i -= 1
130+
u >>= 4
131+
end
132+
loops
133+
end
134+
loopdependencies(ls::LoopSet, os::OperationStruct) = parents_symvec(ls, op.loopdeps)
135+
reduceddependencies(ls::LoopSet, os::OperationStruct) = parents_symvec(ls, op.reduceddeps)
136+
137+
138+
139+
function add_op!(ls::LoopSet, os::OperationStruct, mrefs::Vector{ArrayReferenceMeta}, opsymbol::Symbol, elementbytes::Int)
140+
optype = os.node_type
141+
op = Operation(
142+
length(operations(ls)), opsymbol, elementbytes, os.instruction,
143+
optype, loopdependencies(ls, os), reduceddependencies(ls, os),
144+
Operation[], (isload(op) | isstore(op)) ? mrefs[os.array] : NOTAREFERENCE
145+
)
146+
push!(ls.operations, op)
147+
op
148+
end
149+
function add_parents_to_op!(ls::LoopSet, parents::Vector{Operation}, up::Unsigned)
150+
ops = operations(ls)
151+
while up != zero(up)
152+
pushfirst!(parents, ops[ up % UInt8 ])
153+
up >>>= 8
154+
end
155+
end
156+
function add_parents_to_ops!(ls::LoopSet, ops::Vector{OperationStruct})
157+
for i eachindex(ops)
158+
add_parents_to_op!(ls, parents(getop(ls, i)), ops[i].parents)
159+
end
160+
end
161+
function add_ops!(ls::LoopSet, ops::Vector{OperationStruct}, mrefs::Vector{ArrayReferenceMeta}, opsymbols::Vector{Symbol}, elementbytes::Int)
162+
for i eachindex(ops)
163+
add_op!(ls, ops[i], mrefs, opsymbols[i], elementbytes)
164+
end
165+
add_parents_to_ops!(ls, ops)
166+
end
167+
168+
# elbytes(::VectorizationBase.AbstractPointer{T}) where {T} = sizeof(T)::Int
169+
typeeltype(::Type{P}) where {T,P<:VectorizationBase.AbstractPointer{T}} = T
170+
80171
function avx_body(ops, arf, AM, LB, vargs)
81172
ls = LoopSet()
173+
# elementbytes = mapreduce(elbytes, min, @view(vargs[Base.OneTo(length(arf))]))::Int
174+
elementbytes = sizeof(mapreduce(typeeltype,promote_type,@view(vargs[Base.OneTo(length(arf))])))::Int
82175
add_loops!(ls, LB)
83176
arraysymbolinds = process_metadata!(ls, AM, length(arf))
84177
opsymbols = [gensym(:op) for _ eachindex(ops)]
85-
178+
mrefs = create_mrefs(ls, arf, arraysymbolinds, opsymbols, vargs)
179+
add_ops!(ls, ops, mrefs, opsymbols, elementbytes)
86180
end
87181

182+
88183
@generated function _avx!(::Type{OPS}, ::Type{ARF}, ::Type{AM}, lb::LB, vargs...)
89184
avx_body(
90185
OperationStruct[OPS.parameters...],

0 commit comments

Comments
 (0)