Skip to content

Commit 75b9387

Browse files
committed
16 loops
1 parent 9ce0c2c commit 75b9387

File tree

4 files changed

+85
-92
lines changed

4 files changed

+85
-92
lines changed

src/codegen/operation_evaluation_order.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ function load_short_static_reduction_first!(ls::LoopSet, u₁loop::Symbol, u₂l
142142
end
143143

144144
function fillorder!(ls::LoopSet, order::Vector{Symbol}, u₁loop::Symbol, u₂loop::Symbol, u₂max::Int, vectorized::Symbol)
145-
load_short_static_reduction_first!(ls, u₁loop, u₂loop, vectorized)
145+
load_short_static_reduction_first!(ls, u₁loop, u₂loop, vectorized)
146146
lo = ls.loop_order
147147
resize!(lo, length(ls.loopsymbols))
148148
ro = lo.loopnames # reverse order; will have same order as lo

src/codegen/split_loops.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,6 @@ function lower_and_split_loops(ls::LoopSet, inline::Int)
102102
ls_2 = split_loopset(ls, remaining_ops)
103103
order_2, unrolled_2, tiled_2, vectorized_2, U_2, T_2, cost_2, shouldinline_2 = choose_order_cost(ls_2)
104104
# U_1 = T_1 = U_2 = T_2 = 2
105-
#@show cost_1 + cost_2 ≤ cost_fused, cost_1, cost_2, cost_fused
106105
if cost_1 + cost_2 cost_fused
107106
ls_2_lowered = if length(remaining_ops) > 1
108107
inline = iszero(inline) ? (shouldinline_1 % Int) : inline

src/condense_loopset.jl

Lines changed: 76 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -75,57 +75,57 @@ It supports array-references with up to 8 indexes, where the data for each conse
7575
of `index_types` (storing the enum `IndexType`), `indices` (the `id` for each index symbol), and `offsets` (currently unused).
7676
"""
7777
struct ArrayRefStruct{array,ptr}
78-
index_types::UInt64
79-
indices::UInt64
80-
offsets::UInt64
81-
strides::UInt64
78+
index_types::UInt128
79+
indices::UInt128
80+
offsets::UInt128
81+
strides::UInt128
8282
end
8383
array_and_ptr(@nospecialize(ar::ArrayRefStruct{a,p})) where {a,p} = (a::Symbol,p::Symbol)
8484
# array(@nospecialize(ar::ArrayRefStruct{a,p})) where {a,p} = a::Symbol
8585
# ptr(@nospecialize(ar::ArrayRefStruct{a,p})) where {a,p} = p::Symbol
8686

8787
function findindoradd!(v::Vector{T}, s::T) where {T}
88-
ind = findfirst(==(s), v)
89-
ind === nothing || return ind
90-
push!(v, s)
91-
length(v)
88+
ind = findfirst(==(s), v)
89+
ind === nothing || return ind
90+
push!(v, s)
91+
length(v)
9292
end
9393
function ArrayRefStruct(ls::LoopSet, mref::ArrayReferenceMeta, arraysymbolinds::Vector{Symbol}, ids::Vector{Int})
94-
index_types = zero(UInt64)
95-
indices = zero(UInt64)
96-
offsets = zero(UInt64)
97-
strides = zero(UInt64)
98-
@unpack loopedindex, ref = mref
99-
indv = ref.indices
100-
offv = ref.offsets
101-
strv = ref.strides
102-
# we can discard that the array was considered discontiguous, as it should be recovered from type information
103-
start = 1 + (first(indv) === DISCONTIGUOUS)
104-
for (n,ind) enumerate(@view(indv[start:end]))
105-
index_types <<= 8
106-
indices <<= 8
107-
offsets <<= 8
108-
offsets |= (offv[n] % UInt8)
109-
strides <<= 8
110-
strides |= (strv[n] % UInt8)
111-
if loopedindex[n]
112-
index_types |= LoopIndex
113-
if strv[n] 0
114-
indices |= getloopid(ls, ind)
115-
end
116-
else
117-
parent = get(ls.opdict, ind, nothing)
118-
@assert !(parent === nothing) "Index $ind not found in array."
119-
# if parent === nothing
120-
# index_types |= SymbolicIndex
121-
# indices |= findindoradd!(arraysymbolinds, ind)
122-
# else
123-
index_types |= ComputedIndex
124-
indices |= ids[identifier(parent)]
125-
# end
126-
end
94+
index_types = zero(UInt128)
95+
indices = zero(UInt128)
96+
offsets = zero(UInt128)
97+
strides = zero(UInt128)
98+
@unpack loopedindex, ref = mref
99+
indv = ref.indices
100+
offv = ref.offsets
101+
strv = ref.strides
102+
# we can discard that the array was considered discontiguous, as it should be recovered from type information
103+
start = 1 + (first(indv) === DISCONTIGUOUS)
104+
for (n,ind) enumerate(@view(indv[start:end]))
105+
index_types <<= 8
106+
indices <<= 8
107+
offsets <<= 8
108+
offsets |= (offv[n] % UInt8)
109+
strides <<= 8
110+
strides |= (strv[n] % UInt8)
111+
if loopedindex[n]
112+
index_types |= LoopIndex
113+
if strv[n] 0
114+
indices |= getloopid(ls, ind)
115+
end
116+
else
117+
parent = get(ls.opdict, ind, nothing)
118+
@assert !(parent === nothing) "Index $ind not found in array."
119+
# if parent === nothing
120+
# index_types |= SymbolicIndex
121+
# indices |= findindoradd!(arraysymbolinds, ind)
122+
# else
123+
index_types |= ComputedIndex
124+
indices |= ids[identifier(parent)]
125+
# end
127126
end
128-
ArrayRefStruct{mref.ref.array,mref.ptr}( index_types, indices, offsets, strides )
127+
end
128+
ArrayRefStruct{mref.ref.array,mref.ptr}( index_types, indices, offsets, strides )
129129
end
130130

131131
"""
@@ -135,62 +135,56 @@ A condensed representation of an [`Operation`](@ref).
135135
"""
136136
struct OperationStruct <: AbstractLoopOperation
137137
# instruction::Instruction
138-
loopdeps::UInt64
139-
reduceddeps::UInt64
140-
childdeps::UInt64
141-
parents::UInt64
142-
node_type::OperationType
143-
array::UInt8
144-
symid::UInt8
138+
loopdeps::UInt128
139+
reduceddeps::UInt128
140+
childdeps::UInt128
141+
parents::UInt128
142+
node_type::OperationType
143+
array::UInt8
144+
symid::UInt8
145145
end
146146
optype(os) = os.node_type
147147

148148
function findmatchingarray(ls::LoopSet, mref::ArrayReferenceMeta)
149-
id = 0x01
150-
for r ls.refs_aliasing_syms
151-
r == mref && return id
152-
id += 0x01
153-
end
154-
0x00
149+
id = 0x01
150+
for r ls.refs_aliasing_syms
151+
r == mref && return id
152+
id += 0x01
153+
end
154+
0x00
155155
end
156-
# filled_4byte_chunks(u::UInt64) = 16 - (leading_zeros(u) >>> 2)
157-
filled_8byte_chunks(u::UInt64) = 8 - (leading_zeros(u) >>> 3)
158-
159-
# num_loop_deps(os::OperationStruct) = filled_4byte_chunks(os.loopdeps)
160-
# num_reduced_deps(os::OperationStruct) = filled_4byte_chunks(os.reduceddeps)
161-
# num_child_deps(os::OperationStruct) = filled_4byte_chunks(os.childdeps)
162-
# num_parents(os::OperationStruct) = filled_4byte_chunks(os.parents)
156+
filled_8byte_chunks(u::T) where {T<:Unsigned} = sizeof(T) - (leading_zeros(u) >>> 3)
163157

164158
function shifted_loopset(ls::LoopSet, loopsyms::Vector{Symbol})
165-
ld = zero(UInt64) # leading_zeros(ld) >> 2 yields the number of loopdeps
166-
for d loopsyms
167-
ld <<= 4
168-
ld |= getloopid(ls, d)::Int
169-
end
170-
ld
159+
ld = zero(UInt128) # leading_zeros(ld) >> 2 yields the number of loopdeps
160+
for d loopsyms
161+
ld <<= 4
162+
ld |= getloopid(ls, d)::Int
163+
end
164+
ld
171165
end
172166
loopdeps_uint(ls::LoopSet, op::Operation) = shifted_loopset(ls, loopdependencies(op))
173167
reduceddeps_uint(ls::LoopSet, op::Operation) = shifted_loopset(ls, reduceddependencies(op))
174168
childdeps_uint(ls::LoopSet, op::Operation) = shifted_loopset(ls, reducedchildren(op))
175169
function parents_uint(ls::LoopSet, op::Operation)
176-
p = zero(UInt64)
177-
for parent parents(op)
178-
p <<= 8
179-
p |= identifier(parent)
180-
end
181-
p
170+
p = zero(UInt128)
171+
for parent parents(op)
172+
p <<= 8
173+
p |= identifier(parent)
174+
end
175+
p
182176
end
183177
function recursively_set_parents_true!(x::Vector{Bool}, op::Operation)
184-
x[identifier(op)] && return nothing # don't redescend
185-
x[identifier(op)] = true
186-
for opp parents(op)
187-
recursively_set_parents_true!(x, opp)
188-
end
189-
return nothing
178+
x[identifier(op)] && return nothing # don't redescend
179+
x[identifier(op)] = true
180+
for opp parents(op)
181+
recursively_set_parents_true!(x, opp)
182+
end
183+
return nothing
190184
end
191185
function getroots(ls::LoopSet)::Vector{Bool}
192-
rooted = Vector{Bool}(undef, length(operations(ls)))
193-
getroots!(rooted, ls)
186+
rooted = Vector{Bool}(undef, length(operations(ls)))
187+
getroots!(rooted, ls)
194188
end
195189
function getroots!(rooted::Vector{Bool}, ls::LoopSet)
196190
fill!(rooted, false)

src/reconstruct_loopset.jl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,10 @@ function ArrayReferenceMeta(
106106
)
107107
# unpack the `ArrayRefStruct`
108108
# we don't want to specialize on it, as it is typed on symbols.
109-
index_types = (ar.index_types)::UInt64
110-
indices = (ar.indices)::UInt64
111-
offsets = (ar.offsets)::UInt64
112-
strides = (ar.strides)::UInt64
109+
index_types = (ar.index_types)::UInt128
110+
indices = (ar.indices)::UInt128
111+
offsets = (ar.offsets)::UInt128
112+
strides = (ar.strides)::UInt128
113113
arrayar, ptrar = array_and_ptr(ar)::Tuple{Symbol,Symbol}
114114
# ptrar = ptr(ar)::Symbol
115115
# TODO, check if this matters at all. The compiler still knows it is an `::ArrayRefStruct`, just not `arrayar` or `ptrar`?
@@ -119,7 +119,7 @@ function ArrayReferenceMeta(
119119
)
120120
end
121121
function ArrayReferenceMeta(
122-
ls::LoopSet, index_types::UInt64, indices::UInt64, offsets::UInt64, strides::UInt64,
122+
ls::LoopSet, index_types::UInt128, indices::UInt128, offsets::UInt128, strides::UInt128,
123123
arrayar::Symbol, ptrar::Symbol, arraysymbolinds::Vector{Symbol},
124124
opsymbols::Vector{Symbol}, nopsv::Vector{NOpsType}, expandedv::Vector{Bool}
125125
)
@@ -128,7 +128,7 @@ function ArrayReferenceMeta(
128128
offset_vec = Int8[]
129129
stride_vec = Int8[]
130130
loopedindex = Bool[]
131-
while index_types != zero(UInt64)
131+
while index_types != zero(UInt128)
132132
ind = indices % UInt8
133133
offsetᵢ = offsets % Int8
134134
strideᵢ = strides % Int8
@@ -405,8 +405,8 @@ loopdependencies(ls::LoopSet, os::OperationStruct, expand = false, offset = 0) =
405405
reduceddependencies(ls::LoopSet, os::OperationStruct, expand = false, offset = 0) = parents_symvec(ls, os.reduceddeps, expand, offset)
406406
childdependencies(ls::LoopSet, os::OperationStruct, expand = false, offset = 0) = parents_symvec(ls, os.childdeps, expand, offset)
407407

408-
# parents(ls::LoopSet, u::UInt64) = loopindexoffset(ls, u, false)
409-
parents(ls::LoopSet, u::UInt64) = loopindex(ls, u, 0x08)
408+
# parents(ls::LoopSet, u::UInt128) = loopindexoffset(ls, u, false)
409+
parents(ls::LoopSet, u::UInt128) = loopindex(ls, u, 0x08)
410410
parents(ls::LoopSet, os::OperationStruct) = parents(ls, os.parents)
411411

412412
expandedopname(opsymbol::Symbol, offset::Integer) = Symbol(String(opsymbol)*'#'*string(offset+1)*'#')

0 commit comments

Comments
 (0)