Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ void dump_partitioning(const ov::npuw::Ensemble& ens, const std::string& to) {

pugi::xml_node node = doc.append_child("ensemble");
node.append_attribute("gflops") = std::to_string(ens.gflops).data();
node.append_attribute("irregular_results") = std::to_string(ens.irregular_results).data();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like there are no tests for printing/parsing this structure, please let me know if I missed something


pugi::xml_node part = node.append_child("partitioning");
pugi::xml_node rep;
Expand Down Expand Up @@ -83,6 +84,7 @@ void dump_partitioning(const ov::npuw::Ensemble& ens, const std::string& to) {

doc.save_file(to.data());
}

} // namespace detail

// Interface to get online partitioning from the model
Expand Down Expand Up @@ -308,6 +310,7 @@ class Compiler {

ov::npuw::Ensemble ens;
ens.gflops = 1.; // FIXME: calculate proper flops
ens.irregular_results = !m_snapshot->isRegularResultCase();

auto graph = m_snapshot->getGraph();
// Iterate in topological order
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@ std::shared_ptr<ov::Node> Group::getInitialNode() const {
return *(m_content.begin());
}

const std::unordered_set<std::shared_ptr<ov::Node>>& Group::getOutputs() const {
return m_output_layers;
}

void Group::addInput(const std::shared_ptr<ov::Node>& node) {
m_input_layers.insert(node);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class Group : public std::enable_shared_from_this<Group> {
own::ade::NodeHandle getHandle() const;
// Note: can only be used during initial group initialization
std::shared_ptr<ov::Node> getInitialNode() const;
const std::unordered_set<std::shared_ptr<ov::Node>>& getOutputs() const;
void addInput(const std::shared_ptr<ov::Node>& node);
void addOutput(const std::shared_ptr<ov::Node>& node);
void addContent(const std::shared_ptr<ov::Node>& node);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1270,6 +1270,101 @@ void Snapshot::stripTag(const std::string& tag) {
}
}

bool Snapshot::isRegularResultCase() const {
LOG_INFO("Online partitioning: executing isRegularResultCase pass...");
LOG_BLOCK();

// This method works around an issue where the final partitioning fails the sanity check
// because of a different number of output Convert across repeated block groups.
// The issue was initially observed in a model where only the final block has an additional ov::Result consumer.
// For example, Group[0..30] has only external consumers (i.e. consumers that belong to other groups):
// OpA -> OpB(external group)
// -> OpC(external group)
// but very last Group[31] has an additional ov::Result consumer:
// OpA -> ov::Result
// -> OpB(external group)
// -> OpC(external group)
// Later, if NPUW_F16IC is set, "Partitioner::identifySubgraphs" method adds output Converts to each Group[0..30],
// but skips Group[31] due to internal implementation details.
// "Partitioner::identifySubgraphs" can't:
// - add Convert to the Group[31] because it would require adding opposite Convert for the ov::Result
// - skip adding Converts to Group[0..30] because it would break symmetry of the repeated blocks, i.e.
// in the given graph `Convert(group0) -> output -> input -> Convert(group1)` input `Convert(group1)` should
// be also eliminated
// Therefore, we disable F16IC early in such cases.

using NodeSPtr = std::shared_ptr<ov::Node>;
std::unordered_map<std::string, NodeSPtr> node_id_cache;
for (auto&& node_ptr : m_model->get_ordered_ops()) {
node_id_cache[node_ptr->get_friendly_name()] = node_ptr;
}

auto getReadersMask = [](const NodeSPtr& node_ptr) {
// each element of the vector is
// the number of ov::Result readers for the corresponding output
std::vector<int> mask;
for (auto&& output_desc : node_ptr->outputs()) {
auto readers = output_desc.get_target_inputs();
int result_count = 0;
for (auto&& r : readers) {
auto reader_node_ptr = r.get_node()->shared_from_this();
if (ov::op::util::is_output(reader_node_ptr)) {
result_count++;
}
}
mask.push_back(result_count);
}
return mask;
};

auto reptag_to_gset = repeating();
if (!reptag_to_gset.empty()) {
NPUW_ASSERT(!m_layer_matches.empty());
}

for (const auto& reptag_and_gset : reptag_to_gset) {
auto reptag = reptag_and_gset.first;
auto gset = reptag_and_gset.second;

auto matches = m_layer_matches.at(reptag->id());

if (gset.size() <= 1) {
continue;
}

auto firstGroup = *(gset.begin());
for (auto output_layer : firstGroup->getOutputs()) {
// this is the reference mask expected from all other matched layers
// in the remaining groups of the repeated block
auto expected_readers_mask = getReadersMask(output_layer);

auto this_layer_name = output_layer->get_friendly_name();
auto layer_bank_iter = std::find_if(matches.begin(), matches.end(), [&](const std::set<std::string>& lrs) {
return lrs.count(this_layer_name) > 0;
});

NPUW_ASSERT(layer_bank_iter != matches.end());
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I assume we will have at least one match if we have more than one group.


// match output layers across all groups in the repeated block
// and compare their readers mask
for (const auto& layer_name : *layer_bank_iter) {
auto layer_ptr = node_id_cache.at(layer_name);
auto actual_readers_mask = getReadersMask(layer_ptr);

if (actual_readers_mask != expected_readers_mask) {
LOG_INFO("This is NOT a regular result case. Readers mask mismatch found for "
<< layer_name << " and " << this_layer_name << " output layers.");
return false;
}
}
}
}

LOG_INFO("This is a regular result case");
LOG_INFO("DONE");
return true;
}

size_t Snapshot::getNextRepId() {
return m_current_rep_count++;
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ class Snapshot : public std::enable_shared_from_this<Snapshot> {

void stripTag(const std::string& tag);

// Passes to detect corner cases
bool isRegularResultCase() const;

// Utility
std::shared_ptr<own::ade::Graph> getGraph() const;
const detail::OVPortsMap& getPortsMap() const;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,10 @@ ov::npuw::Ensemble load_groups(const std::shared_ptr<ov::Model>& model, const st

LOG_INFO("Found " << repeated.size() << " different repeated block(s)");

return ov::npuw::Ensemble{get_float_attr(root, "gflops"), std::move(partitions), std::move(repeated)};
return ov::npuw::Ensemble{get_float_attr(root, "gflops"),
get_bool_attr(root, "irregular_results", false),
std::move(partitions),
std::move(repeated)};
}

class Partitioner {
Expand Down Expand Up @@ -376,7 +379,7 @@ void Partitioner::identifySubgraphs() {
LOG_INFO("Identifying subgraphs for model " << model->get_friendly_name() << "...");
LOG_BLOCK();

const bool connect_in_f16 = cfg.get<::intel_npu::NPUW_F16IC>();
const bool connect_in_f16 = cfg.get<::intel_npu::NPUW_F16IC>() && !ens.irregular_results;

using namespace ov::npuw;
std::vector<ov::npuw::Group>& partitions = ens.groups;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ struct RepeatedBlock {

struct Ensemble {
float gflops;
bool irregular_results;
std::vector<Group> groups;

// Just a map as I don't expect 100s of _different_
Expand Down
Loading
Loading