9
9
#include < cstddef>
10
10
#include < cstdint>
11
11
#include < iterator>
12
+ #include < map>
12
13
#include < memory>
13
- #include < tuple>
14
14
#include < vector>
15
15
16
16
#include " openvino/core/except.hpp"
33
33
namespace ov ::snippets::lowered::pass {
34
34
35
35
namespace {
36
- void connect_cloned_body_with_buffers_outside (LinearIR::constExprIt cur_begin,
37
- LinearIR::constExprIt cur_end,
38
- LinearIR::constExprIt res_begin,
39
- LinearIR::constExprIt res_end,
36
+ std::vector<LoopPort> clone_ports (const ExpressionMap& expression_map, const std::vector<LoopPort>& cur_ports) {
37
+ std::vector<LoopPort> new_ports (cur_ports.size ());
38
+ for (size_t i = 0 ; i < cur_ports.size (); ++i) {
39
+ const auto & port = cur_ports[i];
40
+ const auto & original_expr = port.get_expr_port ()->get_expr ().get ();
41
+ OPENVINO_ASSERT (expression_map.count (original_expr), " Cannot find cloned expression for: " , original_expr);
42
+ new_ports[i] = *port.clone_with_new_expr (expression_map.at (original_expr));
43
+ }
44
+ return new_ports;
45
+ }
46
+
47
+ void connect_cloned_body_with_buffers_outside (const LoopManager::LoopBounds& cur_bounds,
48
+ const LoopManager::LoopBounds& res_bounds,
40
49
LinearIR& linear_ir) {
50
+ const auto & [cur_begin, cur_end] = cur_bounds;
51
+ const auto & [res_begin, res_end] = res_bounds;
41
52
for (auto result_it = res_begin, original_it = cur_begin; result_it != res_end; ++result_it, ++original_it) {
42
53
const auto & result_expr = *result_it;
43
54
const auto & original_expr = *original_it;
@@ -136,68 +147,49 @@ size_t InsertSpecificIterations::get_decomposed_loop_increment(const UnifiedLoop
136
147
}
137
148
138
149
LoopManager::LoopBounds InsertSpecificIterations::insert_copy_loop (LinearIR& linear_ir,
139
- const size_t loop_id ,
150
+ const LoopManager::LoopBounds& bounds ,
140
151
const LinearIR::constExprIt& insert_pos,
141
- std::vector<LoopPort>& new_entry_ports,
142
- std::vector<LoopPort>& new_exit_ports) {
143
- const auto & loop_manager = linear_ir.get_loop_manager ();
144
- const auto [loop_begin_pos, loop_end_pos] = loop_manager->get_loop_bounds (linear_ir, loop_id);
145
-
146
- ExpressionMap expression_map;
152
+ ExpressionMap& expression_map) {
153
+ const auto & [loop_begin_pos, loop_end_pos] = bounds;
147
154
const auto & cloning_config = LinearIRBuilder::Config (false );
148
155
const auto & loop_copy_range =
149
156
LinearIRBuilder (cloning_config).clone_range (loop_begin_pos, std::next (loop_end_pos), expression_map);
150
157
const auto new_loop_begin_pos = linear_ir.insert (insert_pos, loop_copy_range.begin (), loop_copy_range.end ());
151
158
const auto new_loop_end_pos = std::prev (insert_pos);
152
-
153
- // Add connections between output of cloned bodies and Buffers from the current LinearIR (Buffers are connections
154
- // between Loops)
155
- connect_cloned_body_with_buffers_outside (loop_begin_pos,
156
- loop_end_pos,
157
- new_loop_begin_pos,
158
- new_loop_end_pos,
159
- linear_ir);
160
-
161
- auto clone_ports = [&expression_map](const std::vector<LoopPort>& ports, std::vector<LoopPort>& new_ports) {
162
- new_ports.resize (ports.size ());
163
- for (size_t i = 0 ; i < ports.size (); ++i) {
164
- const auto & port = ports[i];
165
- new_ports[i] = *port.clone_with_new_expr (expression_map[port.get_expr_port ()->get_expr ().get ()]);
166
- }
167
- };
168
- const auto original_loop_info = loop_manager->get_loop_info (loop_id);
169
- clone_ports (original_loop_info->get_input_ports (), new_entry_ports);
170
- clone_ports (original_loop_info->get_output_ports (), new_exit_ports);
171
-
172
159
return {new_loop_begin_pos, new_loop_end_pos};
173
160
}
174
161
175
162
void InsertSpecificIterations::init_decomposed_loop (LinearIR& linear_ir,
176
- LinearIR::constExprIt begin,
177
- LinearIR::constExprIt end,
163
+ const LoopManager::LoopBounds& decomposed_loop_bounds,
178
164
const ExpandedLoopInfoPtr& decomposed_loop_info,
179
- size_t unified_loop_id,
180
- const std::shared_ptr<op::LoopEnd>& decomposed_loop_end) {
165
+ size_t loop_id_to_replace,
166
+ const std::shared_ptr<op::LoopEnd>& decomposed_loop_end,
167
+ bool run_handlers) {
181
168
const auto & loop_manager = linear_ir.get_loop_manager ();
182
- const auto new_id =
183
- loop_manager->replace_with_new_loop (linear_ir, begin, std::next (end), decomposed_loop_info, unified_loop_id);
169
+ const auto new_id = loop_manager->replace_with_new_loop (linear_ir,
170
+ decomposed_loop_bounds.first ,
171
+ std::next (decomposed_loop_bounds.second ),
172
+ decomposed_loop_info,
173
+ loop_id_to_replace);
184
174
decomposed_loop_end->set_id (new_id);
185
175
decomposed_loop_end->set_work_amount (decomposed_loop_info->get_work_amount ());
186
176
decomposed_loop_end->set_increment (decomposed_loop_info->get_increment ());
187
177
decomposed_loop_end->set_ptr_increments (decomposed_loop_info->get_ptr_increments ());
188
178
decomposed_loop_end->set_finalization_offsets (decomposed_loop_info->get_finalization_offsets ());
189
- // Note: handlers must be run on the range started with the first operation in the loop body.
190
- const auto handlers = decomposed_loop_info->get_handler_passes ();
191
- handlers.run (linear_ir, std::next (begin), end);
179
+ if (run_handlers) {
180
+ const auto handlers = decomposed_loop_info->get_handler_passes ();
181
+ // Note: handlers must be run on the range started with the first operation in the loop body.
182
+ handlers.run (linear_ir, std::next (decomposed_loop_bounds.first ), decomposed_loop_bounds.second );
183
+ }
192
184
}
193
185
194
186
bool InsertSpecificIterations::decompose (LinearIR& linear_ir,
195
187
LinearIR::constExprIt begin,
196
188
LinearIR::constExprIt end,
197
189
const std::shared_ptr<op::LoopEnd>& loop_end) {
198
- const auto loop_id = loop_end->get_id ();
190
+ const auto unified_loop_id = loop_end->get_id ();
199
191
const auto & loop_manager = linear_ir.get_loop_manager ();
200
- const auto & unified_loop_info = loop_manager->get_loop_info <UnifiedLoopInfo>(loop_id );
192
+ const auto & unified_loop_info = loop_manager->get_loop_info <UnifiedLoopInfo>(unified_loop_id );
201
193
202
194
auto remaining_work_amount = unified_loop_info->get_work_amount ();
203
195
const auto is_wa_dynamic = utils::is_dynamic_value (remaining_work_amount);
@@ -219,7 +211,7 @@ bool InsertSpecificIterations::decompose(LinearIR& linear_ir,
219
211
}
220
212
221
213
auto decomposed_loop_end = loop_end;
222
- auto decomposed_loop_begin_it = begin, decomposed_loop_end_it = end;
214
+ LoopManager::LoopBounds decomposed_loop_bounds{ begin, end} ;
223
215
auto decomposed_loop_entry_ports = unified_loop_info->get_input_ports ();
224
216
auto decomposed_loop_exit_ports = unified_loop_info->get_output_ports ();
225
217
auto decomposed_ptr_increments = unified_loop_info->get_ptr_increments ();
@@ -228,13 +220,19 @@ bool InsertSpecificIterations::decompose(LinearIR& linear_ir,
228
220
// Need to copy body if there are other specific sup-loops
229
221
// Otherwise we should update the current body
230
222
if (remaining_work_amount > 0 ) {
231
- std::tie (decomposed_loop_begin_it, decomposed_loop_end_it) =
232
- insert_copy_loop (linear_ir,
233
- loop_id,
234
- begin,
235
- decomposed_loop_entry_ports,
236
- decomposed_loop_exit_ports);
237
- decomposed_loop_end = ov::as_type_ptr<op::LoopEnd>(decomposed_loop_end_it->get ()->get_node ());
223
+ const auto cur_bounds = loop_manager->get_loop_bounds (linear_ir, unified_loop_id);
224
+ ExpressionMap expression_map;
225
+ decomposed_loop_bounds = insert_copy_loop (linear_ir, cur_bounds, begin, expression_map);
226
+
227
+ // Add connections between output of cloned bodies and Buffers from the current LinearIR
228
+ // (Buffers are connections between Loops)
229
+ connect_cloned_body_with_buffers_outside (cur_bounds, decomposed_loop_bounds, linear_ir);
230
+
231
+ const auto original_loop_info = loop_manager->get_loop_info (unified_loop_id);
232
+ decomposed_loop_entry_ports = clone_ports (expression_map, original_loop_info->get_input_ports ());
233
+ decomposed_loop_exit_ports = clone_ports (expression_map, original_loop_info->get_output_ports ());
234
+
235
+ decomposed_loop_end = ov::as_type_ptr<op::LoopEnd>(decomposed_loop_bounds.second ->get ()->get_node ());
238
236
OPENVINO_ASSERT (decomposed_loop_end, " Cloned Loop does not contain LoopEnd op at the expected place." );
239
237
240
238
// Only latest loop iterations must have summarized finalization offsets!
@@ -246,6 +244,59 @@ bool InsertSpecificIterations::decompose(LinearIR& linear_ir,
246
244
offset = 0 ;
247
245
}
248
246
});
247
+
248
+ std::map<UnifiedLoopInfoPtr, UnifiedLoopInfoPtr> unified_loop_map;
249
+ auto get_unified_cloned_info = [&unified_loop_map,
250
+ &expression_map](const ExpandedLoopInfoPtr& expanded_loop_info) {
251
+ const auto & unified_loop_info = expanded_loop_info->get_unified_loop_info ();
252
+ if (unified_loop_map.count (unified_loop_info) == 0 ) {
253
+ LoopInfoMap loop_info_map;
254
+ // Note: we must clone UnifiedLoopInfo for the cloned ExpandedLoopInfos
255
+ auto cloned_info = ov::as_type_ptr<UnifiedLoopInfo>(
256
+ unified_loop_info->clone_with_new_expr (expression_map, loop_info_map));
257
+ OPENVINO_ASSERT (cloned_info, " cloned info must be UnifiedLoopInfo" );
258
+ unified_loop_map[unified_loop_info] = cloned_info;
259
+ }
260
+ OPENVINO_ASSERT (unified_loop_map.count (unified_loop_info),
261
+ " Cloned UnifiedLoopInfo must be cloned at this stage." );
262
+ return unified_loop_map[unified_loop_info];
263
+ };
264
+
265
+ // Note: all internal decomposed loops must be also cloned to avoid a situation
266
+ // when 2 loops with the same ID exist in both specific iterations of the outer loop
267
+ for (auto it = std::next (decomposed_loop_bounds.first ); it != decomposed_loop_bounds.second ; ++it) {
268
+ auto internal_loop_end = ov::as_type_ptr<op::LoopEnd>(it->get ()->get_node ());
269
+ if (!internal_loop_end) {
270
+ continue ;
271
+ }
272
+ const auto loop_begin = internal_loop_end->get_loop_begin ();
273
+ auto begin_it = linear_ir.find_after (std::next (decomposed_loop_bounds.first ),
274
+ linear_ir.get_expr_by_node (loop_begin));
275
+ OPENVINO_ASSERT (begin_it != linear_ir.cend (),
276
+ " Cannot find LoopBegin for LoopEnd with id " ,
277
+ internal_loop_end->get_id ());
278
+ LoopManager::LoopBounds internal_loop_bounds{begin_it, it};
279
+ const auto internal_loop_id = internal_loop_end->get_id ();
280
+ // Note: internal loops must be already decomposed to ExpandedLoops
281
+ const auto internal_loop_info = loop_manager->get_loop_info <ExpandedLoopInfo>(internal_loop_id);
282
+ const auto cloned_loop_info = std::make_shared<ExpandedLoopInfo>(
283
+ internal_loop_info->get_work_amount (),
284
+ internal_loop_info->get_increment (),
285
+ clone_ports (expression_map, internal_loop_info->get_input_ports ()),
286
+ clone_ports (expression_map, internal_loop_info->get_output_ports ()),
287
+ internal_loop_info->get_ptr_increments (),
288
+ internal_loop_info->get_finalization_offsets (),
289
+ internal_loop_info->get_data_sizes (),
290
+ internal_loop_info->get_type (),
291
+ get_unified_cloned_info (internal_loop_info),
292
+ internal_loop_info->is_evaluate_once ());
293
+ init_decomposed_loop (linear_ir,
294
+ internal_loop_bounds,
295
+ cloned_loop_info,
296
+ internal_loop_id,
297
+ internal_loop_end,
298
+ false );
299
+ }
249
300
}
250
301
251
302
const auto decomposed_loop_info = std::make_shared<ExpandedLoopInfo>(work_amount,
@@ -258,11 +309,11 @@ bool InsertSpecificIterations::decompose(LinearIR& linear_ir,
258
309
iter_type,
259
310
unified_loop_info);
260
311
init_decomposed_loop (linear_ir,
261
- decomposed_loop_begin_it,
262
- decomposed_loop_end_it,
312
+ decomposed_loop_bounds,
263
313
decomposed_loop_info,
264
- loop_id,
265
- decomposed_loop_end);
314
+ unified_loop_id,
315
+ decomposed_loop_end,
316
+ true );
266
317
267
318
decomposed = true ;
268
319
}
0 commit comments