[CPU]use state layout of B, H, V, K

zhangYiIntel · zhangYiIntel · commit bd6fe37147ad · 2026-03-04T16:10:36.000+08:00
diff --git a/src/bindings/python/src/openvino/_pyopenvino/op/__init__.pyi b/src/bindings/python/src/openvino/_pyopenvino/op/__init__.pyi
@@ -189,6 +189,7 @@ class _GatedDeltaNet(openvino._pyopenvino.Node):
     Experimental extention for GatedDeltaNet operation. Use with care: no backward compatibility is guaranteed in future releases.
     """
     def __init__(self, arg0: collections.abc.Sequence[openvino._pyopenvino.Output]) -> None:
+        ...
 class assign(openvino._pyopenvino.Node):
     """
     openvino.op.assign wraps ov::op::v6::Assign
diff --git a/src/bindings/python/src/pyopenvino/graph/ops/gated_delta_net.cpp b/src/bindings/python/src/pyopenvino/graph/ops/gated_delta_net.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2025 Intel Corporation
+// Copyright (C) 2018-2026 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/src/bindings/python/src/pyopenvino/graph/ops/gated_delta_net.hpp b/src/bindings/python/src/pyopenvino/graph/ops/gated_delta_net.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2025 Intel Corporation
+// Copyright (C) 2018-2026 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/src/common/transformations/include/transformations/common_optimizations/fuse_gated_delta_net.hpp b/src/common/transformations/include/transformations/common_optimizations/fuse_gated_delta_net.hpp
@@ -1,3 +1,6 @@
+// Copyright (C) 2018-2026 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
 #pragma once
 
 #include "openvino/pass/graph_rewrite.hpp"
diff --git a/src/common/transformations/src/transformations/common_optimizations/fuse_gated_delta_net.cpp b/src/common/transformations/src/transformations/common_optimizations/fuse_gated_delta_net.cpp
@@ -1,3 +1,7 @@
+// Copyright (C) 2018-2026 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
 #include "transformations/common_optimizations/fuse_gated_delta_net.hpp"
 
 #include <cstddef>
diff --git a/src/core/dev_api/openvino/op/gated_delta_net.hpp b/src/core/dev_api/openvino/op/gated_delta_net.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2025 Intel Corporation
+// Copyright (C) 2018-2026 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
diff --git a/src/core/src/op/gated_delta_net.cpp b/src/core/src/op/gated_delta_net.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2025 Intel Corporation
+// Copyright (C) 2018-2026 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -65,12 +65,9 @@ GatedDeltaNet::GatedDeltaNet(const ov::OutputVector& args) : ov::op::Op(args) {
 }
 
 void GatedDeltaNet::validate_and_infer_types() {
-    OV_OP_SCOPE(LinearAttention_validate_and_infer_types);
+    OV_OP_SCOPE(GatedDeltaNet_validate_and_infer_types);
 
-    NODE_VALIDATION_CHECK(this,
-                          get_input_size() == 6,
-                          "GatedDeltaNet expects 6 inputs, but it has ",
-                          get_input_size());
+    NODE_VALIDATION_CHECK(this, get_input_size() == 6, "GatedDeltaNet expects 6 inputs, but it has ", get_input_size());
 
     // format: Node*, input_idx, name, {rank_list}, {type_list}
     input_check(this, 0, "query", {4}, {});
@@ -82,13 +79,15 @@ void GatedDeltaNet::validate_and_infer_types() {
 
     // value head_size may be not same with key
     auto out_ps = get_input_partial_shape(2);
-    const auto&  h_ps= get_input_partial_shape(3);
+    const auto& h_ps = get_input_partial_shape(3);
     set_output_type(0, get_input_element_type(0), out_ps);
     set_output_type(1, get_input_element_type(3), h_ps);
 }
 
 std::shared_ptr<ov::Node> GatedDeltaNet::clone_with_new_inputs(const ov::OutputVector& new_args) const {
-    return std::make_shared<GatedDeltaNet>(new_args);
+    auto cloned = std::make_shared<GatedDeltaNet>(new_args);
+    cloned->m_config = m_config;
+    return cloned;
 }
 
 void GatedDeltaNet::set_out_type(int index, const ov::element::Type& output_type) {
diff --git a/src/plugins/intel_cpu/src/nodes/gated_delta_net.cpp b/src/plugins/intel_cpu/src/nodes/gated_delta_net.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2025 Intel Corporation
+// Copyright (C) 2018-2026 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -16,6 +16,7 @@
 #include "cpu_memory.h"
 #include "cpu_types.h"
 #include "graph_context.h"
+#include "kernels/linear_attn/recurrent_linear_attn.hpp"
 #include "memory_desc/cpu_memory_desc.h"
 #include "node.h"
 #include "nodes/common/blocked_desc_creator.h"
@@ -30,7 +31,6 @@
 #include "shape_inference/shape_inference_internal_dyn.hpp"
 #include "transformations/utils/utils.hpp"
 #include "utils/general_utils.h"
-#include "kernels/linear_attn/recurrent_linear_attn.hpp"
 
 using namespace ov::Extensions::Cpu;
 using namespace dnnl::impl;
@@ -55,8 +55,7 @@ void GatedDeltaNet::initSupportedPrimitiveDescriptors() {
     }
     std::vector<PortConfigurator> outPortConfigs = {
         PortConfigurator{LayoutType::ncsp, dataPrecision, getOutputShapeAtPort(0), false, -1},
-        PortConfigurator{LayoutType::ncsp, dataPrecision, getOutputShapeAtPort(1), false, -1}
-    };
+        PortConfigurator{LayoutType::ncsp, dataPrecision, getOutputShapeAtPort(1), false, -1}};
     addSupportedPrimDesc(inPortConfigs, outPortConfigs, impl_desc_type::ref_any);
 }
 
@@ -86,11 +85,27 @@ void GatedDeltaNet::execute([[maybe_unused]] const dnnl::stream& strm) {
     PlainTensor beta(inputs[5]);
     PlainTensor output_attn(outputs[0]);
     PlainTensor output_recurrent_state(outputs[1]);
-    recurrent_linear_attn(query, key, value, recurrent_state, gate, beta, output_attn, output_recurrent_state);
+    // q, k, h per (B, H, V)
+    const auto& q_dims = inputs[0]->getStaticDims();
+    const auto& v_dims = inputs[2]->getStaticDims();
+    const size_t B = q_dims[0];
+    const size_t H = q_dims[2];
+    const size_t K = q_dims[3];
+    const size_t V = v_dims[3];
+    temp_buffer.resize<float>({B * H * V * 3 * K});
+    recurrent_linear_attn(query,
+                          key,
+                          value,
+                          recurrent_state,
+                          gate,
+                          beta,
+                          output_attn,
+                          output_recurrent_state,
+                          temp_buffer);
 }
 
 bool GatedDeltaNet::isSupportedOperation(const std::shared_ptr<const ov::Node>& op,
-                                          std::string& errorMessage) noexcept {
+                                         std::string& errorMessage) noexcept {
     return true;
 }
 
diff --git a/src/plugins/intel_cpu/src/nodes/gated_delta_net.h b/src/plugins/intel_cpu/src/nodes/gated_delta_net.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2025 Intel Corporation
+// Copyright (C) 2018-2026 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -42,6 +42,9 @@ class GatedDeltaNet : public Node {
     void execute(const dnnl::stream& strm) override;
     void createPrimitive() override;
     static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
+
+private:
+    PlainTensor temp_buffer;
 };
 
 }  // namespace ov::intel_cpu::node
diff --git a/src/plugins/intel_cpu/src/nodes/kernels/linear_attn/recurrent_linear_attn.cpp b/src/plugins/intel_cpu/src/nodes/kernels/linear_attn/recurrent_linear_attn.cpp
diff --git a/src/plugins/intel_cpu/src/nodes/kernels/linear_attn/recurrent_linear_attn.hpp b/src/plugins/intel_cpu/src/nodes/kernels/linear_attn/recurrent_linear_attn.hpp
diff --git a/src/tests/functional/plugin/shared/src/subgraph/gated_delta_net.cpp b/src/tests/functional/plugin/shared/src/subgraph/gated_delta_net.cpp

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-// Copyright (C) 2018-2025 Intel Corporation`
	`1`	`+// Copyright (C) 2018-2026 Intel Corporation`
`2`	`2`	`// SPDX-License-Identifier: Apache-2.0`
`3`	`3`	`//`
`4`	`4`