@@ -89,39 +89,41 @@ function returned_ops(ls::LoopSet)
89
89
end
90
90
91
91
function lower_and_split_loops (ls:: LoopSet , inline:: Int )
92
- split_candidates = returned_ops (ls)
93
- length (split_candidates) > 1 || return lower (ls, inline)
94
- order_fused, unrolled_fused, tiled_fused, vectorized_fused, U_fused, T_fused, cost_fused, shouldinline_fused = choose_order_cost (ls)
95
- remaining_ops = Vector {Int} (undef, length (split_candidates) - 1 ); split_1 = Int[0 ];
96
- # for (ind,i) ∈ enumerate(split_candidates)
97
- for (ind,i) ∈ enumerate (split_candidates)
98
- split_1[1 ] = i
99
- ls_1 = split_loopset (ls, split_1)
100
- order_1, unrolled_1, tiled_1, vectorized_1, U_1, T_1, cost_1, shouldinline_1 = choose_order_cost (ls_1)
101
- remaining_ops[1 : ind- 1 ] .= @view (split_candidates[1 : ind- 1 ]); remaining_ops[ind: end ] .= @view (split_candidates[ind+ 1 : end ])
102
- ls_2 = split_loopset (ls, remaining_ops)
103
- order_2, unrolled_2, tiled_2, vectorized_2, U_2, T_2, cost_2, shouldinline_2 = choose_order_cost (ls_2)
104
- # U_1 = T_1 = U_2 = T_2 = 2
105
- # @show cost_1 + cost_2 ≤ cost_fused, cost_1, cost_2, cost_fused
106
- if cost_1 + cost_2 ≤ cost_fused
107
- ls_2_lowered = if length (remaining_ops) > 1
108
- inline = iszero (inline) ? (shouldinline_1 % Int) : inline
109
- lower_and_split_loops (ls_2, inline)
110
- else
111
- doinline = inlinedecision (inline, shouldinline_1 | shouldinline_2)
112
- lower (ls_2, order_2, unrolled_2, tiled_2, vectorized_2, U_2, T_2, doinline )
113
- end
114
- return Expr (
115
- :block ,
116
- ls . preamble ,
117
- lower (ls_1, order_1, unrolled_1, tiled_1, vectorized_1, U_1, T_1, false ) ,
118
- ls_2_lowered ,
119
- nothing
120
- )
121
- end
92
+ split_candidates = returned_ops (ls)
93
+ length (split_candidates) > 1 || return lower (ls, inline)
94
+ order_fused, unrolled_fused, tiled_fused, vectorized_fused, U_fused, T_fused, cost_fused, shouldinline_fused = choose_order_cost (ls)
95
+ remaining_ops = Vector {Int} (undef, length (split_candidates) - 1 ); split_1 = Int[0 ];
96
+ # for (ind,i) ∈ enumerate(split_candidates)
97
+ for (ind,i) ∈ enumerate (split_candidates)
98
+ split_1[1 ] = i
99
+ ls_1 = split_loopset (ls, split_1)
100
+ order_1, unrolled_1, tiled_1, vectorized_1, U_1, T_1, cost_1, shouldinline_1 = choose_order_cost (ls_1)
101
+ remaining_ops[1 : ind- 1 ] .= @view (split_candidates[1 : ind- 1 ]); remaining_ops[ind: end ] .= @view (split_candidates[ind+ 1 : end ])
102
+ ls_2 = split_loopset (ls, remaining_ops)
103
+ order_2, unrolled_2, tiled_2, vectorized_2, U_2, T_2, cost_2, shouldinline_2 = choose_order_cost (ls_2)
104
+ # U_1 = T_1 = U_2 = T_2 = 2
105
+ # return ls_1, ls_2
106
+ # @show cost_1 + cost_2 ≤ cost_fused, cost_1, cost_2, cost_fused
107
+ if cost_1 + cost_2 ≤ 0.9 cost_fused
108
+ ls_2_lowered = if length (remaining_ops) > 1
109
+ inline = iszero (inline) ? (shouldinline_1 % Int) : inline
110
+ lower_and_split_loops (ls_2, inline)
111
+ else
112
+ doinline = inlinedecision (inline, shouldinline_1 | shouldinline_2 )
113
+ lower (ls_2, order_2, unrolled_2, tiled_2, vectorized_2, U_2, T_2, doinline)
114
+ end
115
+ return Expr (
116
+ :block ,
117
+ ls . preamble ,
118
+ lower (ls_1, order_1, unrolled_1, tiled_1, vectorized_1, U_1, T_1, false ) ,
119
+ ls_2_lowered,
120
+ nothing
121
+ )
122
122
end
123
- doinline = inlinedecision (inline, shouldinline_fused)
124
- lower (ls, order_fused, unrolled_fused, tiled_fused, vectorized_fused, U_fused, T_fused, doinline)
123
+ length (split_candidates) == 2 && break
124
+ end
125
+ doinline = inlinedecision (inline, shouldinline_fused)
126
+ lower (ls, order_fused, unrolled_fused, tiled_fused, vectorized_fused, U_fused, T_fused, doinline)
125
127
end
126
128
127
129
0 commit comments