openvinotoolkit
diff --git a/‎src/common/transformations/include/transformations/common_optimizations/fuse_gated_delta_net.hpp‎
Lines changed: 2 additions & 4 deletions b/‎src/common/transformations/include/transformations/common_optimizations/fuse_gated_delta_net.hpp‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎src/common/transformations/src/transformations/common_optimizations/fuse_gated_delta_net.cpp‎
Lines changed: 133 additions & 140 deletions b/‎src/common/transformations/src/transformations/common_optimizations/fuse_gated_delta_net.cpp‎
Lines changed: 133 additions & 140 deletions
diff --git a/‎src/common/transformations/tests/common_optimizations/fuse_gated_delta_net.cpp‎
Lines changed: 46 additions & 42 deletions b/‎src/common/transformations/tests/common_optimizations/fuse_gated_delta_net.cpp‎
Lines changed: 46 additions & 42 deletions
diff --git a/‎src/core/dev_api/openvino/op/gated_delta_net.hpp‎
Lines changed: 2 additions & 1 deletion b/‎src/core/dev_api/openvino/op/gated_delta_net.hpp‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/core/src/op/gated_delta_net.cpp‎
Lines changed: 9 additions & 9 deletions b/‎src/core/src/op/gated_delta_net.cpp‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎src/core/tests/type_prop/gated_delta_net.cpp‎
Lines changed: 164 additions & 0 deletions b/‎src/core/tests/type_prop/gated_delta_net.cpp‎
Lines changed: 164 additions & 0 deletions
@@ -6,8 +6,7 @@
 #include "openvino/pass/graph_rewrite.hpp"
 #include "transformations_visibility.hpp"
 
-namespace ov {
-namespace pass {
+namespace ov::pass {
 
 /**
  * @ingroup ov_transformation_common_api
@@ -88,5 +87,4 @@ class TRANSFORMATIONS_API GatedDeltaNetFusion : public ov::pass::ModelPass {
     bool run_on_model(const std::shared_ptr<ov::Model>& model) override;
 };
 
-}  // namespace pass
-}  // namespace ov
+}  // namespace ov::pass
@@ -6,6 +6,7 @@
 
 #include <gtest/gtest.h>
 
+#include <climits>
 #include <memory>
 
 #include "common_test_utils/ov_test_utils.hpp"
@@ -32,6 +33,7 @@
 #include "openvino/op/subtract.hpp"
 #include "openvino/op/transpose.hpp"
 #include "openvino/op/unsqueeze.hpp"
+#include "transformations/convert_precision.hpp"
 
 using namespace testing;
 using namespace ov;
@@ -42,23 +44,13 @@ std::shared_ptr<ov::Model> build_looped_gdn(int32_t batch,
                                             int32_t seq_len,
                                             int32_t qk_head_num,
                                             int32_t v_head_num,
-                                            int32_t head_size) {
+                                            int32_t qk_head_size,
+                                            int32_t v_head_size) {
     const auto dtype = ov::element::f32;
-    const ov::Shape qk_shape{static_cast<size_t>(batch),
-                             static_cast<size_t>(seq_len),
-                             static_cast<size_t>(qk_head_num),
-                             static_cast<size_t>(head_size)};
-    const ov::Shape v_tensor_shape{static_cast<size_t>(batch),
-                                   static_cast<size_t>(seq_len),
-                                   static_cast<size_t>(v_head_num),
-                                   static_cast<size_t>(head_size)};
-    const ov::Shape gv_shape{static_cast<size_t>(batch),
-                             static_cast<size_t>(seq_len),
-                             static_cast<size_t>(qk_head_num)};
-    const ov::Shape h_shape{static_cast<size_t>(batch),
-                            static_cast<size_t>(qk_head_num),
-                            static_cast<size_t>(head_size),
-                            static_cast<size_t>(head_size)};
+    const ov::PartialShape qk_shape{batch, seq_len, qk_head_num, qk_head_size};
+    const ov::PartialShape v_tensor_shape{batch, seq_len, v_head_num, v_head_size};
+    const ov::PartialShape gv_shape{batch, seq_len, qk_head_num};
+    const ov::PartialShape h_shape{batch, qk_head_num, qk_head_size, v_head_size};
 
     auto q = std::make_shared<ov::op::v0::Parameter>(dtype, qk_shape);
     auto k = std::make_shared<ov::op::v0::Parameter>(dtype, qk_shape);
@@ -178,16 +170,15 @@ std::shared_ptr<ov::Model> build_looped_gdn(int32_t batch,
     auto reduce_axis0 = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
     auto core_numel = std::make_shared<ov::op::v1::ReduceProd>(core_shape, reduce_axis0, true);
     auto state_shape = std::make_shared<ov::op::v3::ShapeOf>(h0);
-    auto state_numel = std::make_shared<ov::op::v1::ReduceProd>(state_shape, reduce_axis0, true);
-    auto state_slice_end = std::make_shared<ov::op::v1::Add>(core_numel, state_numel);
     auto slice_start = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
     auto slice_step = ov::op::v0::Constant::create(ov::element::i64, {1}, {1});
     auto slice_axis = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
+    auto slice_end_inf = ov::op::v0::Constant::create(ov::element::i64, {1}, {LLONG_MAX});
 
     auto core_slice =
         std::make_shared<ov::op::v8::Slice>(packed_loop_outputs, slice_start, core_numel, slice_step, slice_axis);
     auto state_slice =
-        std::make_shared<ov::op::v8::Slice>(packed_loop_outputs, core_numel, state_slice_end, slice_step, slice_axis);
+        std::make_shared<ov::op::v8::Slice>(packed_loop_outputs, core_numel, slice_end_inf, slice_step, slice_axis);
 
     auto core_restored = std::make_shared<ov::op::v1::Reshape>(core_slice, core_shape, false);
     auto state_restored = std::make_shared<ov::op::v1::Reshape>(state_slice, state_shape, false);
@@ -204,23 +195,13 @@ std::shared_ptr<ov::Model> build_fused_gdn_ref(int32_t batch,
                                                int32_t seq_len,
                                                int32_t qk_head_num,
                                                int32_t v_head_num,
-                                               int32_t head_size) {
-    const auto dtype = ov::element::f32;
-    const ov::Shape qk_shape{static_cast<size_t>(batch),
-                             static_cast<size_t>(seq_len),
-                             static_cast<size_t>(qk_head_num),
-                             static_cast<size_t>(head_size)};
-    const ov::Shape v_tensor_shape{static_cast<size_t>(batch),
-                                   static_cast<size_t>(seq_len),
-                                   static_cast<size_t>(v_head_num),
-                                   static_cast<size_t>(head_size)};
-    const ov::Shape gv_shape{static_cast<size_t>(batch),
-                             static_cast<size_t>(seq_len),
-                             static_cast<size_t>(qk_head_num)};
-    const ov::Shape h_shape{static_cast<size_t>(batch),
-                            static_cast<size_t>(qk_head_num),
-                            static_cast<size_t>(head_size),
-                            static_cast<size_t>(head_size)};
+                                               int32_t qk_head_size,
+                                               int32_t v_head_size,
+                                               ov::element::Type dtype = ov::element::f32) {
+    const ov::PartialShape qk_shape{batch, seq_len, qk_head_num, qk_head_size};
+    const ov::PartialShape v_tensor_shape{batch, seq_len, v_head_num, v_head_size};
+    const ov::PartialShape gv_shape{batch, seq_len, qk_head_num};
+    const ov::PartialShape h_shape{batch, qk_head_num, qk_head_size, v_head_size};
 
     auto q = std::make_shared<ov::op::v0::Parameter>(dtype, qk_shape);
     auto k = std::make_shared<ov::op::v0::Parameter>(dtype, qk_shape);
@@ -233,7 +214,8 @@ std::shared_ptr<ov::Model> build_fused_gdn_ref(int32_t batch,
     ov::op::GatedDeltaNet::Config cfg;
     cfg.fuse_qk_l2norm = true;
     cfg.fuse_q_scale = true;
-    cfg.l2_norm_eps = 1e-6F;
+    cfg.q_l2_norm_eps = 1e-6F;
+    cfg.k_l2_norm_eps = 1e-6F;
     gdn->set_config(cfg);
 
     return std::make_shared<ov::Model>(ov::OutputVector{gdn->output(0), gdn->output(1)},
@@ -245,14 +227,36 @@ std::shared_ptr<ov::Model> build_fused_gdn_ref(int32_t batch,
 TEST_F(TransformationTestsF, GatedDeltaNetFusion_BuildLoopedGDNMode) {
     disable_rt_info_check();
     disable_result_friendly_names_check();
-    constexpr int32_t batch = 2;
-    constexpr int32_t seq_len = 5;
+    constexpr int32_t batch = -1;
+    constexpr int32_t seq_len = -1;
     constexpr int32_t qk_head_num = 4;
     constexpr int32_t v_head_num = 4;
-    constexpr int32_t head_size = 8;
+    constexpr int32_t qk_head_size = 8;
+    constexpr int32_t v_head_size = 16;
 
-    model = build_looped_gdn(batch, seq_len, qk_head_num, v_head_num, head_size);
+    model = build_looped_gdn(batch, seq_len, qk_head_num, v_head_num, qk_head_size, v_head_size);
     manager.register_pass<ov::pass::GatedDeltaNetFusion>();
+    model_ref = build_fused_gdn_ref(batch, seq_len, qk_head_num, v_head_num, qk_head_size, v_head_size);
+}
 
-    model_ref = build_fused_gdn_ref(batch, seq_len, qk_head_num, v_head_num, head_size);
+TEST_F(TransformationTestsF, GatedDeltaNetFusion_BuildLoopedGDNMode_F16) {
+    disable_rt_info_check();
+    disable_result_friendly_names_check();
+    constexpr int32_t batch = -1;
+    constexpr int32_t seq_len = -1;
+    constexpr int32_t qk_head_num = 4;
+    constexpr int32_t v_head_num = 4;
+    constexpr int32_t qk_head_size = 8;
+    constexpr int32_t v_head_size = 16;
+
+    model = build_looped_gdn(batch, seq_len, qk_head_num, v_head_num, qk_head_size, v_head_size);
+    manager.register_pass<pass::ConvertPrecision>(ov::element::f32,
+                                                  ov::element::f16,
+                                                  type_to_fuse_map{},
+                                                  true,
+                                                  true,
+                                                  false);
+    manager.register_pass<ov::pass::GatedDeltaNetFusion>();
+    model_ref =
+        build_fused_gdn_ref(batch, seq_len, qk_head_num, v_head_num, qk_head_size, v_head_size, ov::element::f16);
 }
@@ -18,7 +18,8 @@ class OPENVINO_API GatedDeltaNet : public ov::op::Op {
     struct Config {
         bool fuse_qk_l2norm = false;
         bool fuse_q_scale = false;
-        float l2_norm_eps = 1e-6F;
+        float q_l2_norm_eps = 1e-6F;
+        float k_l2_norm_eps = 1e-6F;
     };
     GatedDeltaNet(const ov::OutputVector& args);
     void validate_and_infer_types() override;
 
@@ -12,8 +12,6 @@
 namespace {
 
 // Validates input rank and type for a node input.
-// We consider that dynamic rank/type are always valid case.
-// Empty {} means any rank/type
 inline void input_check(const ov::Node* node,
                         size_t idx,
                         const std::string_view input_name,
@@ -90,19 +88,20 @@ void GatedDeltaNet::validate_and_infer_types() {
     const auto v_head_num = value_ps[2];
 
     const auto k_head_size = key_ps[3];
+    const auto q_head_size = query_ps[3];
     const auto v_head_size = value_ps[3];
 
     NODE_VALIDATION_CHECK(this,
-                          q_head_num.compatible(k_head_num),
-                          "The number of heads in query and key should be the same, but got ",
+                          q_head_num.compatible(k_head_num) && q_head_num.compatible(v_head_num),
+                          "The number of heads in query key and value should be the same, but got ",
                           q_head_num,
                           " and ",
                           k_head_num,
                           ".");
 
     NODE_VALIDATION_CHECK(this,
-                          k_head_size.compatible(v_head_size),
-                          "The head size in key and value should be the same, but got ",
+                          k_head_size.compatible(q_head_size),
+                          "The head size in key and query should be the same, but got ",
                           k_head_size,
                           " and ",
                           v_head_size,
@@ -112,8 +111,8 @@ void GatedDeltaNet::validate_and_infer_types() {
     const auto beta_head_num = beta_ps[2];
 
     NODE_VALIDATION_CHECK(this,
-                          gate_head_num.compatible(beta_head_num),
-                          "The number of heads in gate and beta should be the same, but got ",
+                          gate_head_num.compatible(beta_head_num) && gate_head_num.compatible(q_head_num),
+                          "The number of heads in gate, beta, and query should be the same, but got ",
                           gate_head_num,
                           " and ",
                           beta_head_num,
@@ -155,7 +154,8 @@ bool GatedDeltaNet::visit_attributes(AttributeVisitor& visitor) {
     visitor.start_structure("config");
     visitor.on_attribute("fuse_qk_l2norm", m_config.fuse_qk_l2norm);
     visitor.on_attribute("fuse_q_scale", m_config.fuse_q_scale);
-    visitor.on_attribute("l2_norm_eps", m_config.l2_norm_eps);
+    visitor.on_attribute("q_l2_norm_eps", m_config.q_l2_norm_eps);
+    visitor.on_attribute("k_l2_norm_eps", m_config.k_l2_norm_eps);
     visitor.finish_structure();
     return true;
 }
 
@@ -0,0 +1,164 @@
+// Copyright (C) 2018-2026 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "openvino/op/gated_delta_net.hpp"
+
+#include <gtest/gtest.h>
+
+#include "common_test_utils/test_assertions.hpp"
+#include "openvino/openvino.hpp"
+
+using namespace ov;
+using namespace testing;
+
+namespace {
+
+std::shared_ptr<op::GatedDeltaNet> make_gdn(const element::Type& et,
+                                            const PartialShape& q,
+                                            const PartialShape& k,
+                                            const PartialShape& v,
+                                            const PartialShape& state,
+                                            const PartialShape& gate,
+                                            const PartialShape& beta) {
+    auto query = std::make_shared<op::v0::Parameter>(et, q);
+    auto key = std::make_shared<op::v0::Parameter>(et, k);
+    auto value = std::make_shared<op::v0::Parameter>(et, v);
+    auto recurrent_state = std::make_shared<op::v0::Parameter>(et, state);
+    auto gate_p = std::make_shared<op::v0::Parameter>(et, gate);
+    auto beta_p = std::make_shared<op::v0::Parameter>(et, beta);
+
+    return std::make_shared<op::GatedDeltaNet>(OutputVector{query, key, value, recurrent_state, gate_p, beta_p});
+}
+
+}  // namespace
+
+TEST(type_prop, gated_delta_net_static_f32) {
+    const auto op = make_gdn(element::f32,
+                             Shape{2, 5, 4, 8},
+                             Shape{2, 5, 4, 8},
+                             Shape{2, 5, 4, 16},
+                             Shape{2, 4, 8, 16},
+                             Shape{2, 5, 4},
+                             Shape{2, 5, 4});
+
+    EXPECT_EQ(op->get_output_size(), 2);
+    EXPECT_EQ(op->get_output_element_type(0), element::f32);
+    EXPECT_EQ(op->get_output_element_type(1), element::f32);
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape(Shape{2, 5, 4, 16}));
+    EXPECT_EQ(op->get_output_partial_shape(1), PartialShape(Shape{2, 4, 8, 16}));
+}
+
+TEST(type_prop, gated_delta_net_static_f16) {
+    const auto op = make_gdn(element::f16,
+                             Shape{2, 5, 4, 8},
+                             Shape{2, 5, 4, 8},
+                             Shape{2, 5, 4, 16},
+                             Shape{2, 4, 8, 16},
+                             Shape{2, 5, 4},
+                             Shape{2, 5, 4});
+
+    EXPECT_EQ(op->get_output_element_type(0), element::f16);
+    EXPECT_EQ(op->get_output_element_type(1), element::f16);
+    EXPECT_EQ(op->get_output_partial_shape(0), PartialShape(Shape{2, 5, 4, 16}));
+    EXPECT_EQ(op->get_output_partial_shape(1), PartialShape(Shape{2, 4, 8, 16}));
+}
+
+TEST(type_prop, gated_delta_net_partial_shape_infer) {
+    const auto op = make_gdn(element::bf16,
+                             PartialShape{{1, 4}, -1, {2, 8}, 64},
+                             PartialShape{{1, 4}, -1, {2, 8}, 64},
+                             PartialShape{{1, 4}, -1, {2, 8}, {32, 128}},
+                             PartialShape{{1, 4}, {2, 8}, 64, {32, 128}},
+                             PartialShape{{1, 4}, -1, {2, 8}},
+                             PartialShape{{1, 4}, -1, {2, 8}});
+
+    EXPECT_EQ(op->get_output_element_type(0), element::bf16);
+    EXPECT_EQ(op->get_output_element_type(1), element::bf16);
+    EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{{1, 4}, -1, {2, 8}, {32, 128}}));
+    EXPECT_EQ(op->get_output_partial_shape(1), (PartialShape{{1, 4}, {2, 8}, 64, {32, 128}}));
+}
+
+TEST(type_prop, gated_delta_net_invalid_query_rank) {
+    OV_EXPECT_THROW(std::ignore = make_gdn(element::f32,
+                                           Shape{2, 5, 8},
+                                           Shape{2, 5, 4, 8},
+                                           Shape{2, 5, 4, 16},
+                                           Shape{2, 4, 8, 16},
+                                           Shape{2, 5, 4},
+                                           Shape{2, 5, 4}),
+                    NodeValidationFailure,
+                    HasSubstr("Rank of `query` input should be in [4] list"));
+}
+
+TEST(type_prop, gated_delta_net_invalid_gate_rank) {
+    OV_EXPECT_THROW(std::ignore = make_gdn(element::f32,
+                                           Shape{2, 5, 4, 8},
+                                           Shape{2, 5, 4, 8},
+                                           Shape{2, 5, 4, 16},
+                                           Shape{2, 4, 8, 16},
+                                           Shape{2, 5, 4, 1},
+                                           Shape{2, 5, 4}),
+                    NodeValidationFailure,
+                    HasSubstr("Rank of `gate` input should be in [3] list"));
+}
+
+TEST(type_prop, gated_delta_net_invalid_type) {
+    OV_EXPECT_THROW(std::ignore = make_gdn(element::i32,
+                                           Shape{2, 5, 4, 8},
+                                           Shape{2, 5, 4, 8},
+                                           Shape{2, 5, 4, 16},
+                                           Shape{2, 4, 8, 16},
+                                           Shape{2, 5, 4},
+                                           Shape{2, 5, 4}),
+                    NodeValidationFailure,
+                    HasSubstr("Element type of `query` input should be in"));
+}
+
+TEST(type_prop, gated_delta_net_head_num_mismatch_qkv) {
+    OV_EXPECT_THROW(std::ignore = make_gdn(element::f32,
+                                           Shape{2, 5, 4, 8},
+                                           Shape{2, 5, 6, 8},
+                                           Shape{2, 5, 4, 16},
+                                           Shape{2, 4, 8, 16},
+                                           Shape{2, 5, 4},
+                                           Shape{2, 5, 4}),
+                    NodeValidationFailure,
+                    HasSubstr("The number of heads in query key and value should be the same"));
+}
+
+TEST(type_prop, gated_delta_net_head_size_mismatch_qk) {
+    OV_EXPECT_THROW(std::ignore = make_gdn(element::f32,
+                                           Shape{2, 5, 4, 8},
+                                           Shape{2, 5, 4, 32},
+                                           Shape{2, 5, 4, 16},
+                                           Shape{2, 4, 32, 16},
+                                           Shape{2, 5, 4},
+                                           Shape{2, 5, 4}),
+                    NodeValidationFailure,
+                    HasSubstr("The head size in key and query should be the same"));
+}
+
+TEST(type_prop, gated_delta_net_gate_beta_head_num_mismatch) {
+    OV_EXPECT_THROW(std::ignore = make_gdn(element::f32,
+                                           Shape{2, 5, 4, 8},
+                                           Shape{2, 5, 4, 8},
+                                           Shape{2, 5, 4, 16},
+                                           Shape{2, 4, 8, 16},
+                                           Shape{2, 5, 6},
+                                           Shape{2, 5, 4}),
+                    NodeValidationFailure,
+                    HasSubstr("The number of heads in gate, beta, and query should be the same"));
+}
+
+TEST(type_prop, gated_delta_net_state_shape_mismatch) {
+    OV_EXPECT_THROW(std::ignore = make_gdn(element::f32,
+                                           Shape{2, 5, 4, 8},
+                                           Shape{2, 5, 4, 8},
+                                           Shape{2, 5, 4, 16},
+                                           Shape{2, 4, 8, 32},
+                                           Shape{2, 5, 4},
+                                           Shape{2, 5, 4}),
+                    NodeValidationFailure,
+                    HasSubstr("The [-1] dim in shape of recurrent_state and value should be the same"));
+}