[NPUW] Add new LiftGather pattern (#30393)

smirnov-alexey · web-flow · commit f7ee437f59ae · 2025-05-08T13:28:16.000Z
diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
@@ -107,6 +107,7 @@ void pre_load_transform(const std::shared_ptr<ov::Model>& model, const ov::AnyMa
         rewr.add_matcher<ov::npuw::patterns::opt::DQLiftGatherAsymCW>();
         rewr.add_matcher<ov::npuw::patterns::opt::DQLiftGatherSymCW>();
         rewr.add_matcher<ov::npuw::patterns::opt::DQLiftGatherSymGQ>();
+        rewr.add_matcher<ov::npuw::patterns::opt::DQLiftGatherCW>();
         rewr.run_on_model(model);
     }
 
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
@@ -1045,6 +1045,39 @@ DQLiftGatherSymCW::DQLiftGatherSymCW() {
     register_matcher(std::make_shared<opp::Matcher>(gather, "DQGatherSymCW"), std::move(callback));
 }
 
+// FIXME: this is mostly a workaround pattern for the partitioning
+DQLiftGatherCW::DQLiftGatherCW() {
+    auto qweight = opp::wrap_type<ov::op::v0::Constant>();
+    auto qcvtw = opp::wrap_type<ov::op::v0::Convert>({qweight});
+
+    auto pids = opp::wrap_type<ov::op::v0::Parameter>();
+    auto cvtids = opp::optional<ov::op::v0::Convert>({pids->output(0)});
+    auto gather = opp::wrap_type<ov::op::v8::Gather>({qcvtw, cvtids, opp::any_input()});
+
+    // Note: Use [=] to make sure the above objects stay alive in the callback
+    auto callback = [=](ov::pass::pattern::Matcher& m) {
+        auto& node_to_output = m.get_pattern_value_map();
+
+        auto matched_out_w = node_to_output.at(qweight);
+        auto matched_out_ids = uat::_(node_to_output).at_or_at(cvtids, pids);
+        const auto& matched_out_gather = node_to_output.at(gather);
+
+        // Create new gathers on W, connect respectively
+        auto new_cvt_w = std::make_shared<ov::op::v0::Convert>(matched_out_w, ov::element::f16);
+        auto gather_c = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{}, 0);
+        auto new_g_w = std::make_shared<ov::op::v8::Gather>(new_cvt_w, matched_out_ids, gather_c);
+
+        auto new_out = std::make_shared<ov::op::v0::Convert>(new_g_w, ov::element::f32);
+        // Reconnect old gather readers to the new Convert
+        for (auto&& r : matched_out_gather.get_target_inputs()) {
+            r.replace_source_output(new_out);
+        }
+
+        return true;  // root was changed
+    };
+    register_matcher(std::make_shared<opp::Matcher>(gather, "DQGatherCW"), std::move(callback));
+}
+
 // Identify a Gather+DQ Sym GQ MatMul pattern, lift Gather up
 // Note(1): this pattern is applied on the full model before any partitioning
 // Note(2): here's a difference, the new lifted Gathers stay behind Convert(W) & Convert(S)
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.hpp
@@ -120,6 +120,12 @@ class DQLiftGatherSymGQ : public ov::pass::MatcherPass {
     DQLiftGatherSymGQ();
 };
 
+class DQLiftGatherCW : public ov::pass::MatcherPass {
+public:
+    OPENVINO_MATCHER_PASS_RTTI("npuw::patterns::opt::DQLiftGatherCW");
+    DQLiftGatherCW();
+};
+
 // Head vocab unpacks
 
 class DQUnpackDictGatheru : public ov::pass::MatcherPass {

Original file line number	Diff line number	Diff line change
`@@ -107,6 +107,7 @@ void pre_load_transform(const std::shared_ptr<ov::Model>& model, const ov::AnyMa`
`107`	`107`	`rewr.add_matcher<ov::npuw::patterns::opt::DQLiftGatherAsymCW>();`
`108`	`108`	`rewr.add_matcher<ov::npuw::patterns::opt::DQLiftGatherSymCW>();`
`109`	`109`	`rewr.add_matcher<ov::npuw::patterns::opt::DQLiftGatherSymGQ>();`
	`110`	`+ rewr.add_matcher<ov::npuw::patterns::opt::DQLiftGatherCW>();`
`110`	`111`	`rewr.run_on_model(model);`
`111`	`112`	`}`
`112`	`113`