Skip to content

Commit c8973c7

Browse files
authored
[Snippets] InsertSpecificIterations lowered pass fix (#32305)
### Details: - *`InsertSpecificIterations` is refactored to improve readability (some usings are used)* - *`InsertSpecificIterations`: fixed the issue which led to non-unique loop indices existence: previously, when outer loop was cloned, inner expanded loop infos kept same loop id even for the cloned parts. This PR addresses this problem by performing inner loops cloning on the cloned outer loop* - *The changes have been covered by new lowered test* ### Tickets: - *part of CVS-172631*
1 parent 8a7348d commit c8973c7

File tree

7 files changed

+441
-88
lines changed

7 files changed

+441
-88
lines changed

src/common/snippets/include/snippets/lowered/pass/insert_specific_iterations.hpp

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -81,32 +81,30 @@ class InsertSpecificIterations : public RangedPass {
8181
/**
8282
* @brief Make a copy of Loop with ID `loop_id` and insert to LinearIR before `insert_pos`
8383
* @param linear_ir target Linear IR
84-
* @param loop_id the target loop ID
84+
* @param bounds loop bounds of current loop
8585
* @param insert_pos insertion position iterator
86-
* @param new_entry_ports reference of vector with Loop input ports that will be updated after insertion
87-
* @param new_exit_ports reference of vector with Loop output ports that will be updated after insertion
86+
* @param expression_map expression map to store pairs [original_expr, new_expr]
8887
* @return LoopBounds: iterators of new LoopBegin and LoopEnd
8988
*/
9089
static LoopManager::LoopBounds insert_copy_loop(LinearIR& linear_ir,
91-
size_t loop_id,
90+
const LoopManager::LoopBounds& bounds,
9291
const LinearIR::constExprIt& insert_pos,
93-
std::vector<LoopPort>& new_entry_ports,
94-
std::vector<LoopPort>& new_exit_ports);
92+
ExpressionMap& expression_map);
9593
/**
9694
* @brief Initializes decomposed loop: update ptr arithmetic, work_amout, increment, ID
9795
* @param linear_ir target Linear IR
98-
* @param begin iterator of LoopBegin
99-
* @param end iterator of LoopEnd
96+
* @param decomposed_loop_bounds decomposed loop bounds
10097
* @param decomposed_loop_info loop info of the corresponding decomposed loop
101-
* @param unified_loop_id ID of the unified loop
98+
* @param loop_id_to_replace ID of the loop which should be replaced by the decomposed one
10299
* @param decomposed_loop_end LoopEnd of the decomposed loop
100+
* @param run_handlers flag to run handlers for the decomposed loop
103101
*/
104102
static void init_decomposed_loop(LinearIR& linear_ir,
105-
LinearIR::constExprIt begin,
106-
LinearIR::constExprIt end,
103+
const LoopManager::LoopBounds& decomposed_loop_bounds,
107104
const ExpandedLoopInfoPtr& decomposed_loop_info,
108-
size_t unified_loop_id,
109-
const std::shared_ptr<op::LoopEnd>& decomposed_loop_end);
105+
size_t loop_id_to_replace,
106+
const std::shared_ptr<op::LoopEnd>& decomposed_loop_end,
107+
bool run_handlers);
110108
};
111109

112110
} // namespace ov::snippets::lowered::pass

src/common/snippets/src/lowered/pass/insert_specific_iterations.cpp

Lines changed: 106 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
#include <cstddef>
1010
#include <cstdint>
1111
#include <iterator>
12+
#include <map>
1213
#include <memory>
13-
#include <tuple>
1414
#include <vector>
1515

1616
#include "openvino/core/except.hpp"
@@ -33,11 +33,22 @@
3333
namespace ov::snippets::lowered::pass {
3434

3535
namespace {
36-
void connect_cloned_body_with_buffers_outside(LinearIR::constExprIt cur_begin,
37-
LinearIR::constExprIt cur_end,
38-
LinearIR::constExprIt res_begin,
39-
LinearIR::constExprIt res_end,
36+
std::vector<LoopPort> clone_ports(const ExpressionMap& expression_map, const std::vector<LoopPort>& cur_ports) {
37+
std::vector<LoopPort> new_ports(cur_ports.size());
38+
for (size_t i = 0; i < cur_ports.size(); ++i) {
39+
const auto& port = cur_ports[i];
40+
const auto& original_expr = port.get_expr_port()->get_expr().get();
41+
OPENVINO_ASSERT(expression_map.count(original_expr), "Cannot find cloned expression for: ", original_expr);
42+
new_ports[i] = *port.clone_with_new_expr(expression_map.at(original_expr));
43+
}
44+
return new_ports;
45+
}
46+
47+
void connect_cloned_body_with_buffers_outside(const LoopManager::LoopBounds& cur_bounds,
48+
const LoopManager::LoopBounds& res_bounds,
4049
LinearIR& linear_ir) {
50+
const auto& [cur_begin, cur_end] = cur_bounds;
51+
const auto& [res_begin, res_end] = res_bounds;
4152
for (auto result_it = res_begin, original_it = cur_begin; result_it != res_end; ++result_it, ++original_it) {
4253
const auto& result_expr = *result_it;
4354
const auto& original_expr = *original_it;
@@ -136,68 +147,49 @@ size_t InsertSpecificIterations::get_decomposed_loop_increment(const UnifiedLoop
136147
}
137148

138149
LoopManager::LoopBounds InsertSpecificIterations::insert_copy_loop(LinearIR& linear_ir,
139-
const size_t loop_id,
150+
const LoopManager::LoopBounds& bounds,
140151
const LinearIR::constExprIt& insert_pos,
141-
std::vector<LoopPort>& new_entry_ports,
142-
std::vector<LoopPort>& new_exit_ports) {
143-
const auto& loop_manager = linear_ir.get_loop_manager();
144-
const auto [loop_begin_pos, loop_end_pos] = loop_manager->get_loop_bounds(linear_ir, loop_id);
145-
146-
ExpressionMap expression_map;
152+
ExpressionMap& expression_map) {
153+
const auto& [loop_begin_pos, loop_end_pos] = bounds;
147154
const auto& cloning_config = LinearIRBuilder::Config(false);
148155
const auto& loop_copy_range =
149156
LinearIRBuilder(cloning_config).clone_range(loop_begin_pos, std::next(loop_end_pos), expression_map);
150157
const auto new_loop_begin_pos = linear_ir.insert(insert_pos, loop_copy_range.begin(), loop_copy_range.end());
151158
const auto new_loop_end_pos = std::prev(insert_pos);
152-
153-
// Add connections between output of cloned bodies and Buffers from the current LinearIR (Buffers are connections
154-
// between Loops)
155-
connect_cloned_body_with_buffers_outside(loop_begin_pos,
156-
loop_end_pos,
157-
new_loop_begin_pos,
158-
new_loop_end_pos,
159-
linear_ir);
160-
161-
auto clone_ports = [&expression_map](const std::vector<LoopPort>& ports, std::vector<LoopPort>& new_ports) {
162-
new_ports.resize(ports.size());
163-
for (size_t i = 0; i < ports.size(); ++i) {
164-
const auto& port = ports[i];
165-
new_ports[i] = *port.clone_with_new_expr(expression_map[port.get_expr_port()->get_expr().get()]);
166-
}
167-
};
168-
const auto original_loop_info = loop_manager->get_loop_info(loop_id);
169-
clone_ports(original_loop_info->get_input_ports(), new_entry_ports);
170-
clone_ports(original_loop_info->get_output_ports(), new_exit_ports);
171-
172159
return {new_loop_begin_pos, new_loop_end_pos};
173160
}
174161

175162
void InsertSpecificIterations::init_decomposed_loop(LinearIR& linear_ir,
176-
LinearIR::constExprIt begin,
177-
LinearIR::constExprIt end,
163+
const LoopManager::LoopBounds& decomposed_loop_bounds,
178164
const ExpandedLoopInfoPtr& decomposed_loop_info,
179-
size_t unified_loop_id,
180-
const std::shared_ptr<op::LoopEnd>& decomposed_loop_end) {
165+
size_t loop_id_to_replace,
166+
const std::shared_ptr<op::LoopEnd>& decomposed_loop_end,
167+
bool run_handlers) {
181168
const auto& loop_manager = linear_ir.get_loop_manager();
182-
const auto new_id =
183-
loop_manager->replace_with_new_loop(linear_ir, begin, std::next(end), decomposed_loop_info, unified_loop_id);
169+
const auto new_id = loop_manager->replace_with_new_loop(linear_ir,
170+
decomposed_loop_bounds.first,
171+
std::next(decomposed_loop_bounds.second),
172+
decomposed_loop_info,
173+
loop_id_to_replace);
184174
decomposed_loop_end->set_id(new_id);
185175
decomposed_loop_end->set_work_amount(decomposed_loop_info->get_work_amount());
186176
decomposed_loop_end->set_increment(decomposed_loop_info->get_increment());
187177
decomposed_loop_end->set_ptr_increments(decomposed_loop_info->get_ptr_increments());
188178
decomposed_loop_end->set_finalization_offsets(decomposed_loop_info->get_finalization_offsets());
189-
// Note: handlers must be run on the range started with the first operation in the loop body.
190-
const auto handlers = decomposed_loop_info->get_handler_passes();
191-
handlers.run(linear_ir, std::next(begin), end);
179+
if (run_handlers) {
180+
const auto handlers = decomposed_loop_info->get_handler_passes();
181+
// Note: handlers must be run on the range started with the first operation in the loop body.
182+
handlers.run(linear_ir, std::next(decomposed_loop_bounds.first), decomposed_loop_bounds.second);
183+
}
192184
}
193185

194186
bool InsertSpecificIterations::decompose(LinearIR& linear_ir,
195187
LinearIR::constExprIt begin,
196188
LinearIR::constExprIt end,
197189
const std::shared_ptr<op::LoopEnd>& loop_end) {
198-
const auto loop_id = loop_end->get_id();
190+
const auto unified_loop_id = loop_end->get_id();
199191
const auto& loop_manager = linear_ir.get_loop_manager();
200-
const auto& unified_loop_info = loop_manager->get_loop_info<UnifiedLoopInfo>(loop_id);
192+
const auto& unified_loop_info = loop_manager->get_loop_info<UnifiedLoopInfo>(unified_loop_id);
201193

202194
auto remaining_work_amount = unified_loop_info->get_work_amount();
203195
const auto is_wa_dynamic = utils::is_dynamic_value(remaining_work_amount);
@@ -219,7 +211,7 @@ bool InsertSpecificIterations::decompose(LinearIR& linear_ir,
219211
}
220212

221213
auto decomposed_loop_end = loop_end;
222-
auto decomposed_loop_begin_it = begin, decomposed_loop_end_it = end;
214+
LoopManager::LoopBounds decomposed_loop_bounds{begin, end};
223215
auto decomposed_loop_entry_ports = unified_loop_info->get_input_ports();
224216
auto decomposed_loop_exit_ports = unified_loop_info->get_output_ports();
225217
auto decomposed_ptr_increments = unified_loop_info->get_ptr_increments();
@@ -228,13 +220,19 @@ bool InsertSpecificIterations::decompose(LinearIR& linear_ir,
228220
// Need to copy body if there are other specific sup-loops
229221
// Otherwise we should update the current body
230222
if (remaining_work_amount > 0) {
231-
std::tie(decomposed_loop_begin_it, decomposed_loop_end_it) =
232-
insert_copy_loop(linear_ir,
233-
loop_id,
234-
begin,
235-
decomposed_loop_entry_ports,
236-
decomposed_loop_exit_ports);
237-
decomposed_loop_end = ov::as_type_ptr<op::LoopEnd>(decomposed_loop_end_it->get()->get_node());
223+
const auto cur_bounds = loop_manager->get_loop_bounds(linear_ir, unified_loop_id);
224+
ExpressionMap expression_map;
225+
decomposed_loop_bounds = insert_copy_loop(linear_ir, cur_bounds, begin, expression_map);
226+
227+
// Add connections between output of cloned bodies and Buffers from the current LinearIR
228+
// (Buffers are connections between Loops)
229+
connect_cloned_body_with_buffers_outside(cur_bounds, decomposed_loop_bounds, linear_ir);
230+
231+
const auto original_loop_info = loop_manager->get_loop_info(unified_loop_id);
232+
decomposed_loop_entry_ports = clone_ports(expression_map, original_loop_info->get_input_ports());
233+
decomposed_loop_exit_ports = clone_ports(expression_map, original_loop_info->get_output_ports());
234+
235+
decomposed_loop_end = ov::as_type_ptr<op::LoopEnd>(decomposed_loop_bounds.second->get()->get_node());
238236
OPENVINO_ASSERT(decomposed_loop_end, "Cloned Loop does not contain LoopEnd op at the expected place.");
239237

240238
// Only latest loop iterations must have summarized finalization offsets!
@@ -246,6 +244,59 @@ bool InsertSpecificIterations::decompose(LinearIR& linear_ir,
246244
offset = 0;
247245
}
248246
});
247+
248+
std::map<UnifiedLoopInfoPtr, UnifiedLoopInfoPtr> unified_loop_map;
249+
auto get_unified_cloned_info = [&unified_loop_map,
250+
&expression_map](const ExpandedLoopInfoPtr& expanded_loop_info) {
251+
const auto& unified_loop_info = expanded_loop_info->get_unified_loop_info();
252+
if (unified_loop_map.count(unified_loop_info) == 0) {
253+
LoopInfoMap loop_info_map;
254+
// Note: we must clone UnifiedLoopInfo for the cloned ExpandedLoopInfos
255+
auto cloned_info = ov::as_type_ptr<UnifiedLoopInfo>(
256+
unified_loop_info->clone_with_new_expr(expression_map, loop_info_map));
257+
OPENVINO_ASSERT(cloned_info, "cloned info must be UnifiedLoopInfo");
258+
unified_loop_map[unified_loop_info] = cloned_info;
259+
}
260+
OPENVINO_ASSERT(unified_loop_map.count(unified_loop_info),
261+
"Cloned UnifiedLoopInfo must be cloned at this stage.");
262+
return unified_loop_map[unified_loop_info];
263+
};
264+
265+
// Note: all internal decomposed loops must be also cloned to avoid a situation
266+
// when 2 loops with the same ID exist in both specific iterations of the outer loop
267+
for (auto it = std::next(decomposed_loop_bounds.first); it != decomposed_loop_bounds.second; ++it) {
268+
auto internal_loop_end = ov::as_type_ptr<op::LoopEnd>(it->get()->get_node());
269+
if (!internal_loop_end) {
270+
continue;
271+
}
272+
const auto loop_begin = internal_loop_end->get_loop_begin();
273+
auto begin_it = linear_ir.find_after(std::next(decomposed_loop_bounds.first),
274+
linear_ir.get_expr_by_node(loop_begin));
275+
OPENVINO_ASSERT(begin_it != linear_ir.cend(),
276+
"Cannot find LoopBegin for LoopEnd with id ",
277+
internal_loop_end->get_id());
278+
LoopManager::LoopBounds internal_loop_bounds{begin_it, it};
279+
const auto internal_loop_id = internal_loop_end->get_id();
280+
// Note: internal loops must be already decomposed to ExpandedLoops
281+
const auto internal_loop_info = loop_manager->get_loop_info<ExpandedLoopInfo>(internal_loop_id);
282+
const auto cloned_loop_info = std::make_shared<ExpandedLoopInfo>(
283+
internal_loop_info->get_work_amount(),
284+
internal_loop_info->get_increment(),
285+
clone_ports(expression_map, internal_loop_info->get_input_ports()),
286+
clone_ports(expression_map, internal_loop_info->get_output_ports()),
287+
internal_loop_info->get_ptr_increments(),
288+
internal_loop_info->get_finalization_offsets(),
289+
internal_loop_info->get_data_sizes(),
290+
internal_loop_info->get_type(),
291+
get_unified_cloned_info(internal_loop_info),
292+
internal_loop_info->is_evaluate_once());
293+
init_decomposed_loop(linear_ir,
294+
internal_loop_bounds,
295+
cloned_loop_info,
296+
internal_loop_id,
297+
internal_loop_end,
298+
false);
299+
}
249300
}
250301

251302
const auto decomposed_loop_info = std::make_shared<ExpandedLoopInfo>(work_amount,
@@ -258,11 +309,11 @@ bool InsertSpecificIterations::decompose(LinearIR& linear_ir,
258309
iter_type,
259310
unified_loop_info);
260311
init_decomposed_loop(linear_ir,
261-
decomposed_loop_begin_it,
262-
decomposed_loop_end_it,
312+
decomposed_loop_bounds,
263313
decomposed_loop_info,
264-
loop_id,
265-
decomposed_loop_end);
314+
unified_loop_id,
315+
decomposed_loop_end,
316+
true);
266317

267318
decomposed = true;
268319
}

0 commit comments

Comments
 (0)