@@ -7,13 +7,23 @@ isdense(::Type{<:DenseArray}) = true
7
7
memload
8
8
memstore
9
9
reduction
10
+ compute
10
11
end
11
12
12
13
13
14
struct Operation
14
- outtype :: DataType
15
+ elementbytes :: Int
15
16
instruction:: Symbol
16
17
node_type:: NodeType
18
+ parents:: Vector{Operation}
19
+ children:: Vector{Operation}
20
+ metadata:: Vector{Float64}
21
+ function Operation (elementbytes, instruction, node_type)
22
+ new (
23
+ elementbytes, instruction, node_type,
24
+ Operation[], Operation[], Float64[]
25
+ )
26
+ end
17
27
end
18
28
19
29
isreduction (op:: Operation ) = op. node_type == reduction
@@ -43,6 +53,11 @@ function Base.hash(x::ShortVector, h::UInt)
43
53
h
44
54
end
45
55
56
+ function stride (op:: Operation , sym:: Symbol )
57
+ @assert accesses_memory (op) " This operation does not access memory!"
58
+ # access stride info?
59
+ end
60
+ function
46
61
47
62
struct Node
48
63
type:: DataType
@@ -107,9 +122,7 @@ function evaluate_cost_unroll(
107
122
total_cost = 0.0
108
123
iter = 1.0
109
124
# Need to check if fusion is possible
110
- W, Wshift = VectorizationBase. pick_vector_width_shift (length (ls, unrolled), biggest_type (ls)):: Tuple{Int,Int}
111
-
112
- fused_with_previous = fill (false , length (order))
125
+ # W, Wshift = VectorizationBase.pick_vector_width_shift(length(ls, unrolled), biggest_type(ls))::Tuple{Int,Int}
113
126
for itersym ∈ order
114
127
# Add to set of defined symbles
115
128
push! (nested_loop_syms, itersym)
@@ -119,8 +132,7 @@ function evaluate_cost_unroll(
119
132
end
120
133
iter *= liter
121
134
# check which vars we can define at this level of loop nest
122
- added_vars = 0
123
- for (var,instruction) ∈ variables (ls)
135
+ for var ∈ variables (ls)
124
136
# won't define if already defined...
125
137
sym (var) ∈ included_vars && continue
126
138
# it must also be a subset of defined symbols
@@ -131,9 +143,6 @@ function evaluate_cost_unroll(
131
143
total_cost += iter * cost (var, W, Wshift, unrolled, liter)
132
144
total_cost > max_cost && return total_cost # abort
133
145
end
134
- if added_vars == 0
135
- # Then it is worth checking if we can fuse with previous
136
- end
137
146
end
138
147
end
139
148
function evaluate_cost_tile (
0 commit comments