Skip to content

Commit 5259a57

Browse files
authored
Avoid inserting Convert operations for irregular ov::Result case (#33402)
### Details: Essentially the partitioning ignores `::intel_npu::NPUW_F16IC` option for the irregular `ov::Result` consumer. ### Tickets: - E-193955
1 parent 948724c commit 5259a57

File tree

10 files changed

+340
-41
lines changed

10 files changed

+340
-41
lines changed

src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ void dump_partitioning(const ov::npuw::Ensemble& ens, const std::string& to) {
2929

3030
pugi::xml_node node = doc.append_child("ensemble");
3131
node.append_attribute("gflops") = std::to_string(ens.gflops).data();
32+
node.append_attribute("irregular_results") = std::to_string(ens.irregular_results).data();
3233

3334
pugi::xml_node part = node.append_child("partitioning");
3435
pugi::xml_node rep;
@@ -83,6 +84,7 @@ void dump_partitioning(const ov::npuw::Ensemble& ens, const std::string& to) {
8384

8485
doc.save_file(to.data());
8586
}
87+
8688
} // namespace detail
8789

8890
// Interface to get online partitioning from the model
@@ -308,6 +310,7 @@ class Compiler {
308310

309311
ov::npuw::Ensemble ens;
310312
ens.gflops = 1.; // FIXME: calculate proper flops
313+
ens.irregular_results = !m_snapshot->isRegularResultCase();
311314

312315
auto graph = m_snapshot->getGraph();
313316
// Iterate in topological order

src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ std::shared_ptr<ov::Node> Group::getInitialNode() const {
135135
return *(m_content.begin());
136136
}
137137

138+
const std::unordered_set<std::shared_ptr<ov::Node>>& Group::getOutputs() const {
139+
return m_output_layers;
140+
}
141+
138142
void Group::addInput(const std::shared_ptr<ov::Node>& node) {
139143
m_input_layers.insert(node);
140144
}

src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ class Group : public std::enable_shared_from_this<Group> {
4949
own::ade::NodeHandle getHandle() const;
5050
// Note: can only be used during initial group initialization
5151
std::shared_ptr<ov::Node> getInitialNode() const;
52+
const std::unordered_set<std::shared_ptr<ov::Node>>& getOutputs() const;
5253
void addInput(const std::shared_ptr<ov::Node>& node);
5354
void addOutput(const std::shared_ptr<ov::Node>& node);
5455
void addContent(const std::shared_ptr<ov::Node>& node);

src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1270,6 +1270,101 @@ void Snapshot::stripTag(const std::string& tag) {
12701270
}
12711271
}
12721272

1273+
bool Snapshot::isRegularResultCase() const {
1274+
LOG_INFO("Online partitioning: executing isRegularResultCase pass...");
1275+
LOG_BLOCK();
1276+
1277+
// This method works around an issue where the final partitioning fails the sanity check
1278+
// because of a different number of output Convert across repeated block groups.
1279+
// The issue was initially observed in a model where only the final block has an additional ov::Result consumer.
1280+
// For example, Group[0..30] has only external consumers (i.e. consumers that belong to other groups):
1281+
// OpA -> OpB(external group)
1282+
// -> OpC(external group)
1283+
// but very last Group[31] has an additional ov::Result consumer:
1284+
// OpA -> ov::Result
1285+
// -> OpB(external group)
1286+
// -> OpC(external group)
1287+
// Later, if NPUW_F16IC is set, "Partitioner::identifySubgraphs" method adds output Converts to each Group[0..30],
1288+
// but skips Group[31] due to internal implementation details.
1289+
// "Partitioner::identifySubgraphs" can't:
1290+
// - add Convert to the Group[31] because it would require adding opposite Convert for the ov::Result
1291+
// - skip adding Converts to Group[0..30] because it would break symmetry of the repeated blocks, i.e.
1292+
// in the given graph `Convert(group0) -> output -> input -> Convert(group1)` input `Convert(group1)` should
1293+
// be also eliminated
1294+
// Therefore, we disable F16IC early in such cases.
1295+
1296+
using NodeSPtr = std::shared_ptr<ov::Node>;
1297+
std::unordered_map<std::string, NodeSPtr> node_id_cache;
1298+
for (auto&& node_ptr : m_model->get_ordered_ops()) {
1299+
node_id_cache[node_ptr->get_friendly_name()] = node_ptr;
1300+
}
1301+
1302+
auto getReadersMask = [](const NodeSPtr& node_ptr) {
1303+
// each element of the vector is
1304+
// the number of ov::Result readers for the corresponding output
1305+
std::vector<int> mask;
1306+
for (auto&& output_desc : node_ptr->outputs()) {
1307+
auto readers = output_desc.get_target_inputs();
1308+
int result_count = 0;
1309+
for (auto&& r : readers) {
1310+
auto reader_node_ptr = r.get_node()->shared_from_this();
1311+
if (ov::op::util::is_output(reader_node_ptr)) {
1312+
result_count++;
1313+
}
1314+
}
1315+
mask.push_back(result_count);
1316+
}
1317+
return mask;
1318+
};
1319+
1320+
auto reptag_to_gset = repeating();
1321+
if (!reptag_to_gset.empty()) {
1322+
NPUW_ASSERT(!m_layer_matches.empty());
1323+
}
1324+
1325+
for (const auto& reptag_and_gset : reptag_to_gset) {
1326+
auto reptag = reptag_and_gset.first;
1327+
auto gset = reptag_and_gset.second;
1328+
1329+
auto matches = m_layer_matches.at(reptag->id());
1330+
1331+
if (gset.size() <= 1) {
1332+
continue;
1333+
}
1334+
1335+
auto firstGroup = *(gset.begin());
1336+
for (auto output_layer : firstGroup->getOutputs()) {
1337+
// this is the reference mask expected from all other matched layers
1338+
// in the remaining groups of the repeated block
1339+
auto expected_readers_mask = getReadersMask(output_layer);
1340+
1341+
auto this_layer_name = output_layer->get_friendly_name();
1342+
auto layer_bank_iter = std::find_if(matches.begin(), matches.end(), [&](const std::set<std::string>& lrs) {
1343+
return lrs.count(this_layer_name) > 0;
1344+
});
1345+
1346+
NPUW_ASSERT(layer_bank_iter != matches.end());
1347+
1348+
// match output layers across all groups in the repeated block
1349+
// and compare their readers mask
1350+
for (const auto& layer_name : *layer_bank_iter) {
1351+
auto layer_ptr = node_id_cache.at(layer_name);
1352+
auto actual_readers_mask = getReadersMask(layer_ptr);
1353+
1354+
if (actual_readers_mask != expected_readers_mask) {
1355+
LOG_INFO("This is NOT a regular result case. Readers mask mismatch found for "
1356+
<< layer_name << " and " << this_layer_name << " output layers.");
1357+
return false;
1358+
}
1359+
}
1360+
}
1361+
}
1362+
1363+
LOG_INFO("This is a regular result case");
1364+
LOG_INFO("DONE");
1365+
return true;
1366+
}
1367+
12731368
size_t Snapshot::getNextRepId() {
12741369
return m_current_rep_count++;
12751370
}

src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ class Snapshot : public std::enable_shared_from_this<Snapshot> {
5656

5757
void stripTag(const std::string& tag);
5858

59+
// Passes to detect corner cases
60+
bool isRegularResultCase() const;
61+
5962
// Utility
6063
std::shared_ptr<own::ade::Graph> getGraph() const;
6164
const detail::OVPortsMap& getPortsMap() const;

src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,10 @@ ov::npuw::Ensemble load_groups(const std::shared_ptr<ov::Model>& model, const st
182182

183183
LOG_INFO("Found " << repeated.size() << " different repeated block(s)");
184184

185-
return ov::npuw::Ensemble{get_float_attr(root, "gflops"), std::move(partitions), std::move(repeated)};
185+
return ov::npuw::Ensemble{get_float_attr(root, "gflops"),
186+
get_bool_attr(root, "irregular_results", false),
187+
std::move(partitions),
188+
std::move(repeated)};
186189
}
187190

188191
class Partitioner {
@@ -376,7 +379,7 @@ void Partitioner::identifySubgraphs() {
376379
LOG_INFO("Identifying subgraphs for model " << model->get_friendly_name() << "...");
377380
LOG_BLOCK();
378381

379-
const bool connect_in_f16 = cfg.get<::intel_npu::NPUW_F16IC>();
382+
const bool connect_in_f16 = cfg.get<::intel_npu::NPUW_F16IC>() && !ens.irregular_results;
380383

381384
using namespace ov::npuw;
382385
std::vector<ov::npuw::Group>& partitions = ens.groups;

src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ struct RepeatedBlock {
158158

159159
struct Ensemble {
160160
float gflops;
161+
bool irregular_results;
161162
std::vector<Group> groups;
162163

163164
// Just a map as I don't expect 100s of _different_

0 commit comments

Comments
 (0)