From b539b481f23d39fc857551763b4779eebeaf368f Mon Sep 17 00:00:00 2001
From: Vasily Shamporov <vasily.shamporov@intel.com>
Date: Mon, 6 Oct 2025 14:52:39 +0200
Subject: [PATCH 1/8] Add Adaptive R-KV reference op implementation

---
 .../reference/adaptive_rkv_diversity.hpp      | 204 ++++++++++++++++++
 1 file changed, 204 insertions(+)
 create mode 100644 src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
diff --git a/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp b/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
new file mode 100644
index 00000000000000..e8065409da203f
--- /dev/null
+++ b/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
@@ -0,0 +1,204 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <cmath>
+#include <cstddef>
+#include <memory>
+#include <queue>
+
+#include "openvino/reference/matmul.hpp"
+#include "openvino/reference/normalize_l2.hpp"
+#include "openvino/reference/reduce_mean.hpp"
+#include "openvino/reference/slice.hpp"
+#include "openvino/runtime/tensor.hpp"
+
+namespace ov::reference {
+
+
+/** @brief Reference implementation of the XAttention sparse attention prefill mechanism
+ * (https://arxiv.org/abs/2503.16428) */
+template <typename T>
+class AdaptiveRKVDiversityCalculator {
+public:
+    /** @param threshold Defines a threshold for introduced block sparsity - XAttention attempts to preserve the
+     * smallest subset of attention score matrix blocks so that the ratio of the attention score sum to the total sum of
+     * attention score matrix elements is no less than `threshold`. In other words, `threshold` defines a fraction of
+     * the attention score mass which is to be preserved by most "important" blocks. Valid range is 0.0-1.0, with 0.0
+     * corresponding to 0% of the blocks retained, and 1.0 corresponding to 100% of the blocks retained.
+     * @param block_size The size of blocks into which the attention score matrix [num_heads, query_token_dimension,
+     * key_token_dimension] will be subdivided for purposes of determining the subset of the most important blocks
+     * according to `threshold`. This subdivision occurs on query and key dimensions of the attention score matrix with
+     * the same granularity, i.e. the resulting blocks have equal size on both dimensions. Essentially `block_size`
+     * defines the granularity of the eventual sparse attention computations. Must be a multiple of `stride`.
+     * @param stride The stride at which the full attention matrix is subsampled in a block-antidiagonal fashion to
+     * estimate the block importance. Note that the full attention matrix is not computed, instead the original query
+     * and key matrices are reshaped appropriately so that only the necessary elements are computed. Ideally, the
+     * computational complexity of the entire block estimation operation is `stride` times lower than the full attention
+     * matrix computation.
+     * */
+    AdaptiveRKVDiversityCalculator(size_t start_size, size_t eviction_size, size_t block_size)
+        : m_start_size(start_size),
+          m_eviction_size(eviction_size),
+          m_block_size(block_size) {
+        OPENVINO_ASSERT(start_size % block_size == 0);
+        OPENVINO_ASSERT(eviction_size % block_size == 0);
+    }
+
+    /** Divides the input rank-3 tensor into blocks along last two dimensions, performs the addition of the values
+     * inside each block and outputs each block sum into corresponding positions in the output tensor downsampled along
+     * the same dimensions. The output tensor dimensions are such that the query and key token dimensions are
+     * downsampled by `block_size` when compared to the *original* query and key tensors.
+     * @param attention_scores_data Pointer to the attention score input.
+     * @param attention_score_shape Shape of the attention score input tensor. Expected shape is [num_heads,
+     * num_query_tokens / stride, num_key_tokens / stride], where `num_query_tokens` and `num_key_tokens` must be
+     * multiples of `block_size`.
+     * @param out Pointer to the output tensor data (block sums)
+     * @param out_shape Shape of the output tensor data. Expected shape is [num_heads, num_query_tokens / block_size,
+     * num_key_tokens / block_size].
+     */
+    void fill_diagonal_(const T* in_out,
+                        const Shape& in_out_shape,
+                        T val) {
+        OPENVINO_ASSERT(in_out_shape.size() == 3);  // [num_heads, token_dim, token_dim]
+        OPENVINO_ASSERT(in_out_shape[1] == in_out_shape[2]);  // [num_heads, token_dim, token_dim]
+
+
+        for (size_t head_idx = 0; head_idx < in_out_shape[0]; head_idx++) {
+            size_t in_head_offset = head_idx * in_out_shape[1] * in_out_shape[2];
+            for (size_t token_dim_idx = 0; token_dim_idx < in_out_shape[1]; token_dim_idx++) {
+                size_t diagonal_element_offset = token_dim_idx + token_dim_idx * in_out_shape[1];
+                auto diagonal_element_ptr = in_out + in_head_offset + diagonal_element_offset;
+                *diagonal_element_ptr = val;
+            }
+        }
+    }
+
+    void fill_low_values_with_zeros_(const T* in_out,
+                                     const Shape& in_out_shape,
+                                     const T* means,
+                                     const Shape& means_shape) {
+        OPENVINO_ASSERT(in_out_shape.size() == 3);  // [num_heads, token_dim, token_dim]
+        OPENVINO_ASSERT(in_out_shape[1] == in_out_shape[2]);
+        OPENVINO_ASSERT(means_shape.size() == 2);   // [num_heads, token_dim]
+        OPENVINO_ASSERT(means_shape[0] == in_out_shape[0]);
+        OPENVINO_ASSERT(means_shape[1] == in_out_shape[1]);
+
+        for (size_t head_idx = 0; head_idx < in_out_shape[0]; head_idx++) {
+            size_t in_head_offset = head_idx * in_out_shape[1] * in_out_shape[2];
+            size_t means_head_offset = head_idx * means_shape[1];
+            for (size_t token_dim_idx = 0; token_dim_idx < in_out_shape[1]; token_dim_idx++) {
+                T mean_val = means[means_head_offset + token_dim_idx];
+                size_t token_offset = token_dim_idx * in_out_shape[2];
+                for (size_t reduced_dim_idx = 0; reduced_dim_idx < in_out_shape[2]; reduced_dim_idx++) {
+                    size_t target_offset = in_head_offset + token_offset + reduced_dim_idx;
+                    T filled_val = in_out[target_offset];
+                    in_out[target_offset] = filled_val >= mean_val ? filled_val : 0.0;
+                }
+            }
+        }
+    }
+
+    void block_sum_diversity_values(const T* processed_similarity_token_data,
+                                    const Shape& processed_similarity_token_data_shape,
+                                    T* out,
+                                    const Shape& out_shape) {
+        OPENVINO_ASSERT(processed_similarity_token_data_shape.size() == 2);  // [token_dim, token_dim]
+        OPENVINO_ASSERT(processed_similarity_token_data_shape[0] == processed_similarity_token_data_shape[1]);
+        OPENVINO_ASSERT(processed_similarity_token_data_shape[0] % m_block_size == 0);
+
+        OPENVINO_ASSERT(out_shape.size() == 2);  // [block_dim, token_dim]
+        OPENVINO_ASSERT(out_shape[0] == processed_similarity_token_data_shape[0] / m_block_size);
+        OPENVINO_ASSERT(out_shape[1] == processed_similarity_token_data_shape[1]);
+
+        std::memset(out, 0, out_shape[0] * out_shape[1] * sizeof(T));
+
+        for (size_t out_block_dim_idx = 0; out_block_dim_idx < out_shape[0]; out_block_dim_idx++) {
+            size_t out_block_offset = out_block_dim_idx * out_shape[1];
+            for (size_t out_token_dim_idx = 0; out_token_dim_idx < out_shape[1]; out_token_dim_idx++) {
+               size_t in_block_offset = (out_block_dim_idx * m_block_size) * out_shape[1];
+               for (size_t in_token_in_block_idx = 0; in_token_in_block_idx < m_block_size; in_token_in_block_idx++) {
+                  size_t source_offset = in_block_offset + in_token_in_block_idx * processed_similarity_token_data_shape[1] + out_token_dim_idx;
+                  out[out_block_offset + out_token_dim_idx] += processed_similarity_token_data[source_offset];
+               }
+            }
+        }
+    }
+
+    /** Applies XAttention to the provided query and key matrices, returning the subset of the most important blocks for
+     * each attention head, according to the configured block size and threshold, which are to be preserved in the
+     * subsequent sparse attention computation.
+     * @param query_data Pointer to the query input tensor data
+     * @param query_shape Shape of the query input tensor data. Expected shape is [num_heads, num_query_tokens,
+     * head_size], where `num_query_tokens` must be a multiple of both `block_size` and `stride`, padded with zeroes if
+     * necessary to do so in the real-world scenario.
+     * @param key_data Pointer to the key input tensor data
+     * @param key_shape Shape of the key input tensor data. Expected shape is [num_heads, num_key_tokens, head_size],
+     * where `num_key_tokens` must be a multiple of both `block_size` and `stride`, padded with zeroes if necessary to
+     * do so in the real-world scenario.
+     * @return A vector of size `num_heads` of sets, each set containing pairs of block indices (.first is the block
+     * index along the query dimension, .second - along the key). Each set is the head-specific subset of blocks that
+     * must be preserved in the sparse attention computation. Indices are given in units of XAttention-specific
+     * `block_size` (as configured), which may differ from the block size in the paged attention implementation.
+     */
+    std::vector<std::vector<T>> calculate_block_diversity(const T* key_data,
+                                                  const Shape& key_shape) {
+        OPENVINO_ASSERT(key_shape.size() == 3);    // [num_heads, key_token_len, head_dim]
+        OPENVINO_ASSERT(key_shape[1] >= m_block_size * (m_start_size + m_eviction_size));
+
+        // Should be safe to use this in-place
+        ov::reference::normalize_l2(key_data, key_data, key_shape, {2}, std::numeric_limits<T>::epsilon());
+
+        Shape cos_similar_shape = {key_shape[0], key_shape[1], key_shape[1]};
+        auto cos_similar_buf = allocate_buf(cos_similar_shape);
+        ov::reference::matmul(key_data, key_data, cos_similar_buf.get(), key_shape, key_shape, cos_similar_shape, /* transpose_arg0 = */ false, /* transpose_arg1 = */ true);
+
+        Shape evictable_subset_shape = {key_shape[0], m_eviction_size, m_eviction_size};
+        auto evictable_subset_buf = allocate_buf(evictable_subset_shape);
+        // stops?
+        ov::reference::slice(cos_similar_buf.get(), cos_similar_shape, evictable_subset_buf.get(), evictable_subset_shape, sizeof(T), /* starts = */ {m_start_size, m_start_size}, /* steps = */ {1, 1}, /* axes = */{1, 2});
+        cos_similar_buf.reset();
+
+        fill_diagonal_(evictable_subset_buf.get(), evictable_subset_shape, 0.0);
+
+        Shape means_shape = {key_shape[0], m_eviction_size};
+        auto means_buf = allocate_buf(means_shape);
+        ov::reference::reduce_mean(evictable_subset_buf.get(), means_buf.get(), evictable_subset_shape, {2});
+
+        fill_low_values_with_zeros_(evictable_subset_buf.get(), evictable_subset_shape, means_buf.get(), means_shape);
+
+        Shape aggregated_token_similarities_shape = {m_eviction_size, m_eviction_size};
+        auto aggregated_token_similarities_buf = allocate_buf(aggregated_token_similarities_shape);
+        ov::reference::reduce_mean(evictable_subset_buf.get(), aggregated_token_similarities_buf.get(), evictable_subset_shape, {0});
+        evictable_subset_buf.reset();
+
+        Shape block_diversity_shape = {m_eviction_size / m_block_size, m_eviction_size};
+        auto block_diversity_buf = allocate_buf(block_diversity_shape);
+        block_sum_diversity_values(aggregated_token_similarities_buf.get(), aggregated_token_similarities_shape, block_diversity_buf.get(), block_diversity_shape);
+        std::vector<std::vector<T>> retval(block_diversity_shape[0], std::vector<T>(block_diversity_shape[1]));
+        for (size_t block_idx = 0; block_idx < block_diversity_shape[0]; block_idx++) {
+            for (size_t token_idx = 0; token_idx < block_diversity_shape[1]; token_idx++) {
+                retval[block_idx][token_idx] = block_diversity_buf.get() + block_idx * block_diversity_shape[1] + token_idx;
+            }
+        }
+
+        return retval;
+    }
+
+    /**
+     * @param shape Shape of a tensor
+     * @return A shared_ptr owning a buffer that can be used to store tensor data for the given shape.
+     * */
+    std::shared_ptr<T[]> allocate_buf(const Shape& shape) {
+        return std::shared_ptr<T[]>(new T[ov::shape_size(shape)]);
+    }
+
+
+    size_t m_start_size;
+    size_t m_eviction_size;
+    size_t m_block_size;
+};
+
+}  // namespace ov::reference

From 5a71b4e93daa2db5129a66750008462eed3ca7f1 Mon Sep 17 00:00:00 2001
From: Vasily Shamporov <vasily.shamporov@intel.com>
Date: Tue, 7 Oct 2025 13:32:03 +0200
Subject: [PATCH 2/8] Add basic tests

---
 .../reference/adaptive_rkv_diversity.hpp      |  21 +-
 .../reference/adaptive_rkv_diversity.cpp      | 443 ++++++++++++++++++
 2 files changed, 456 insertions(+), 8 deletions(-)
 create mode 100644 src/core/tests/reference/adaptive_rkv_diversity.cpp

diff --git a/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp b/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
index e8065409da203f..b3f4f755266279 100644
--- a/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
+++ b/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
@@ -9,6 +9,7 @@
 #include <memory>
 #include <queue>
 
+#include "openvino/op/util/attr_types.hpp"
 #include "openvino/reference/matmul.hpp"
 #include "openvino/reference/normalize_l2.hpp"
 #include "openvino/reference/reduce_mean.hpp"
@@ -59,7 +60,7 @@ class AdaptiveRKVDiversityCalculator {
      * @param out_shape Shape of the output tensor data. Expected shape is [num_heads, num_query_tokens / block_size,
      * num_key_tokens / block_size].
      */
-    void fill_diagonal_(const T* in_out,
+    void fill_diagonal_(T* in_out,
                         const Shape& in_out_shape,
                         T val) {
         OPENVINO_ASSERT(in_out_shape.size() == 3);  // [num_heads, token_dim, token_dim]
@@ -76,7 +77,7 @@ class AdaptiveRKVDiversityCalculator {
         }
     }
 
-    void fill_low_values_with_zeros_(const T* in_out,
+    void fill_low_values_with_zeros_(T* in_out,
                                      const Shape& in_out_shape,
                                      const T* means,
                                      const Shape& means_shape) {
@@ -121,7 +122,7 @@ class AdaptiveRKVDiversityCalculator {
                size_t in_block_offset = (out_block_dim_idx * m_block_size) * out_shape[1];
                for (size_t in_token_in_block_idx = 0; in_token_in_block_idx < m_block_size; in_token_in_block_idx++) {
                   size_t source_offset = in_block_offset + in_token_in_block_idx * processed_similarity_token_data_shape[1] + out_token_dim_idx;
-                  out[out_block_offset + out_token_dim_idx] += processed_similarity_token_data[source_offset];
+                  out[out_block_offset + out_token_dim_idx] -= processed_similarity_token_data[source_offset];
                }
             }
         }
@@ -146,19 +147,22 @@ class AdaptiveRKVDiversityCalculator {
     std::vector<std::vector<T>> calculate_block_diversity(const T* key_data,
                                                   const Shape& key_shape) {
         OPENVINO_ASSERT(key_shape.size() == 3);    // [num_heads, key_token_len, head_dim]
-        OPENVINO_ASSERT(key_shape[1] >= m_block_size * (m_start_size + m_eviction_size));
+        OPENVINO_ASSERT(key_shape[1] >= m_start_size + m_eviction_size);
 
+
+        auto normalized_key_data_buf = allocate_buf(key_shape);
         // Should be safe to use this in-place
-        ov::reference::normalize_l2(key_data, key_data, key_shape, {2}, std::numeric_limits<T>::epsilon());
+        ov::reference::normalize_l2(key_data, normalized_key_data_buf.get(), key_shape, {2}, std::numeric_limits<float>::epsilon(), ov::op::EpsMode::ADD);
 
         Shape cos_similar_shape = {key_shape[0], key_shape[1], key_shape[1]};
         auto cos_similar_buf = allocate_buf(cos_similar_shape);
-        ov::reference::matmul(key_data, key_data, cos_similar_buf.get(), key_shape, key_shape, cos_similar_shape, /* transpose_arg0 = */ false, /* transpose_arg1 = */ true);
+        ov::reference::matmul(normalized_key_data_buf.get(), normalized_key_data_buf.get(), cos_similar_buf.get(), key_shape, key_shape, cos_similar_shape, /* transpose_arg0 = */ false, /* transpose_arg1 = */ true);
+        normalized_key_data_buf.reset();
 
         Shape evictable_subset_shape = {key_shape[0], m_eviction_size, m_eviction_size};
         auto evictable_subset_buf = allocate_buf(evictable_subset_shape);
         // stops?
-        ov::reference::slice(cos_similar_buf.get(), cos_similar_shape, evictable_subset_buf.get(), evictable_subset_shape, sizeof(T), /* starts = */ {m_start_size, m_start_size}, /* steps = */ {1, 1}, /* axes = */{1, 2});
+        ov::reference::slice(reinterpret_cast<char*>(cos_similar_buf.get()), cos_similar_shape, reinterpret_cast<char*>(evictable_subset_buf.get()), evictable_subset_shape, sizeof(T), /* starts = */ {m_start_size, m_start_size}, /* steps = */ {1, 1}, /* axes = */{1, 2});
         cos_similar_buf.reset();
 
         fill_diagonal_(evictable_subset_buf.get(), evictable_subset_shape, 0.0);
@@ -168,6 +172,7 @@ class AdaptiveRKVDiversityCalculator {
         ov::reference::reduce_mean(evictable_subset_buf.get(), means_buf.get(), evictable_subset_shape, {2});
 
         fill_low_values_with_zeros_(evictable_subset_buf.get(), evictable_subset_shape, means_buf.get(), means_shape);
+        means_buf.reset();
 
         Shape aggregated_token_similarities_shape = {m_eviction_size, m_eviction_size};
         auto aggregated_token_similarities_buf = allocate_buf(aggregated_token_similarities_shape);
@@ -180,7 +185,7 @@ class AdaptiveRKVDiversityCalculator {
         std::vector<std::vector<T>> retval(block_diversity_shape[0], std::vector<T>(block_diversity_shape[1]));
         for (size_t block_idx = 0; block_idx < block_diversity_shape[0]; block_idx++) {
             for (size_t token_idx = 0; token_idx < block_diversity_shape[1]; token_idx++) {
-                retval[block_idx][token_idx] = block_diversity_buf.get() + block_idx * block_diversity_shape[1] + token_idx;
+                retval[block_idx][token_idx] = block_diversity_buf[block_idx * block_diversity_shape[1] + token_idx];
             }
         }
 
diff --git a/src/core/tests/reference/adaptive_rkv_diversity.cpp b/src/core/tests/reference/adaptive_rkv_diversity.cpp
new file mode 100644
index 00000000000000..6595162e1aa3ec
--- /dev/null
+++ b/src/core/tests/reference/adaptive_rkv_diversity.cpp
@@ -0,0 +1,443 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include <openvino/reference/adaptive_rkv_diversity.hpp>
+
+namespace adaptive_rkv_test {
+size_t DEFAULT_BLOCK_SIZE = 2;
+size_t DEFAULT_START_SIZE = 2;
+size_t DEFAULT_EVICTION_SIZE = 10;
+
+
+TEST(AdaptiveRKVE2ESmokeTest, CalculatesDiversityWithoutThrowing) {
+    ov::reference::AdaptiveRKVDiversityCalculator<double> calculator(DEFAULT_START_SIZE,
+                                                                     DEFAULT_EVICTION_SIZE,
+                                                                     DEFAULT_BLOCK_SIZE);
+
+    ov::Shape mock_shape{2, (DEFAULT_START_SIZE + DEFAULT_EVICTION_SIZE) * 2, 8};
+    std::vector<double> mock_data(ov::shape_size(mock_shape), 1.0);
+
+    EXPECT_NO_THROW(calculator.calculate_block_diversity(mock_data.data(), mock_shape));
+};
+
+
+struct FillDiagonalTestData {
+    ov::Shape in_shape;
+    std::vector<double> in_data;
+    std::vector<double> ref_out_data;
+};
+
+using AdaptiveRKVDiversityFillDiagonalTest = ::testing::TestWithParam<FillDiagonalTestData>;
+
+std::vector<FillDiagonalTestData> FILL_DIAGONAL_TEST_CASES = {
+    {
+        {2, 4, 4},
+        // clang-format off
+        {
+             3.144,  8.512,  8.518, -8.386,
+             7.889, -5.721,  5.507,  4.295,
+            -6.624, -8.463,  7.474,  9.879,
+             4.534, -5.908, -9.388,  2.356,
+
+             7.497,  8.186, -8.658, -4.796,
+            -8.248, -9.797, -7.907, -4.513,
+             3.469,  7.633,  7.244, -6.844,
+            -7.173,  4.450,  6.705, -7.035
+        },
+        // clang-format on
+
+        // clang-format off
+        {
+             42.00,  8.512,  8.518, -8.386,
+             7.889,  42.00,  5.507,  4.295,
+            -6.624, -8.463,  42.00,  9.879,
+             4.534, -5.908, -9.388,  42.00,
+
+             42.00,  8.186, -8.658, -4.796,
+            -8.248,  42.00, -7.907, -4.513,
+             3.469,  7.633,  42.00, -6.844,
+            -7.173,  4.450,  6.705,  42.00
+        },
+        // clang-format on
+    }
+};
+
+TEST_P(AdaptiveRKVDiversityFillDiagonalTest, FillsDiagonal) {
+    auto test_struct = GetParam();
+    ASSERT_EQ(test_struct.in_data.size(), ov::shape_size(test_struct.in_shape));
+    ASSERT_EQ(test_struct.ref_out_data.size(), ov::shape_size(test_struct.in_shape));
+
+    ov::reference::AdaptiveRKVDiversityCalculator<double> calculator(DEFAULT_START_SIZE, DEFAULT_EVICTION_SIZE, DEFAULT_BLOCK_SIZE);
+
+    std::vector<double> test_out_data = test_struct.in_data;
+    calculator.fill_diagonal_(test_out_data.data(),
+                              test_struct.in_shape,
+                              42.0);
+    EXPECT_EQ(test_out_data, test_struct.ref_out_data);
+}
+
+INSTANTIATE_TEST_SUITE_P(VariousInputs,
+                         AdaptiveRKVDiversityFillDiagonalTest,
+                         ::testing::ValuesIn(FILL_DIAGONAL_TEST_CASES));
+
+struct FillLowValuesWithZerosTestData {
+    ov::Shape in_shape;
+    std::vector<double> in_data;
+    ov::Shape means_shape;
+    std::vector<double> means;
+    std::vector<double> ref_out_data;
+};
+
+using AdaptiveRKVFillLowValuesWithZerosTest = ::testing::TestWithParam<FillLowValuesWithZerosTestData>;
+
+std::vector<FillLowValuesWithZerosTestData> FILL_LOW_VALUES_WITH_ZEROS_TEST_CASES = {
+    {
+        {2, 4, 4},
+        // clang-format off
+        {
+             4.534, -5.908, -9.388,  2.356,
+            -6.624, -8.463,  7.474,  9.879,
+             7.889, -5.721,  5.507,  4.295,
+             3.144,  8.512,  8.518, -8.386,
+
+            -7.173,  4.450,  6.705, -7.035,
+             3.469,  7.633,  7.244, -6.844,
+            -8.248, -9.797, -7.907, -4.513,
+             7.497,  8.186, -8.658, -4.796,
+        },
+        // clang-format on
+
+        {2, 4},
+
+        // clang-format off
+        {
+            -2.1015,
+            0.5665,
+            2.9925,
+            2.947,
+
+            -0.76325,
+            2.8755,
+            -7.61625,
+            0.55725
+        },
+
+        // clang-format off
+        {
+             4.534,  0.000,  0.000,  2.356,
+             0.000,  0.000,  7.474,  9.879,
+             7.889,  0.000,  5.507,  4.295,
+             3.144,  8.512,  8.518,  0.000,
+
+             0.000,  4.450,  6.705,  0.000,
+             3.469,  7.633,  7.244,  0.000,
+             0.000,  0.000,  0.000, -4.513,
+             7.497,  8.186,  0.000,  0.000,
+        },
+        // clang-format on
+    },
+};
+
+TEST_P(AdaptiveRKVFillLowValuesWithZerosTest, FillsLowValuesWithZero) {
+    auto test_struct = GetParam();
+    ASSERT_EQ(test_struct.in_data.size(), ov::shape_size(test_struct.in_shape));
+    ASSERT_EQ(test_struct.means.size(), ov::shape_size(test_struct.means_shape));
+    ASSERT_EQ(test_struct.ref_out_data.size(), ov::shape_size(test_struct.in_shape));
+
+    ov::reference::AdaptiveRKVDiversityCalculator<double> calculator(DEFAULT_START_SIZE,
+                                                                     DEFAULT_EVICTION_SIZE,
+                                                                     DEFAULT_BLOCK_SIZE);
+    std::vector<double> test_out_data = test_struct.in_data;
+    calculator.fill_low_values_with_zeros_(test_out_data.data(), test_struct.in_shape, test_struct.means.data(), test_struct.means_shape);
+
+    EXPECT_THAT(test_out_data, ::testing::Pointwise(::testing::DoubleNear(1e-8), test_struct.ref_out_data));
+}
+
+INSTANTIATE_TEST_SUITE_P(VariousInputs,
+                         AdaptiveRKVFillLowValuesWithZerosTest,
+                         ::testing::ValuesIn(FILL_LOW_VALUES_WITH_ZEROS_TEST_CASES));
+
+
+struct BlockSumTestData {
+    ov::Shape in_shape;
+    std::vector<double> in_data;
+    size_t block_size;
+    ov::Shape out_shape;
+    std::vector<double> ref_out_data;
+};
+
+using AdaptiveRKVBlockSumTest = ::testing::TestWithParam<BlockSumTestData>;
+
+std::vector<BlockSumTestData> BLOCK_SUM_TEST_CASES = {
+    {
+        {8, 8},
+        // clang-format off
+        {
+            0.1117, 0.0780, 0.1347, 0.0885, 0.1942, 0.0922, 0.1184, 0.1824,
+            0.1488, 0.1766, 0.0852, 0.1239, 0.0930, 0.1220, 0.1367, 0.1138,
+            0.1410, 0.0861, 0.0774, 0.1325, 0.1478, 0.1689, 0.0885, 0.1579,
+            0.1248, 0.1038, 0.1842, 0.0935, 0.1813, 0.0890, 0.0897, 0.1336,
+            0.0905, 0.1049, 0.1263, 0.0953, 0.1018, 0.1297, 0.1659, 0.1855,
+            0.1373, 0.1791, 0.1005, 0.1286, 0.1492, 0.1373, 0.0820, 0.0860,
+            0.0997, 0.1285, 0.0786, 0.1366, 0.1963, 0.0904, 0.1488, 0.1211,
+            0.1859, 0.1174, 0.1364, 0.0930, 0.1028, 0.1034, 0.1699, 0.0912
+        },
+        // clang-format on
+
+        /* block_size = */ 2,
+
+        {4, 8},
+
+        // clang-format off
+        {
+            -0.2605, -0.2546, -0.2199, -0.2124, -0.2872, -0.2142, -0.2551, -0.2962,
+            -0.2658, -0.1899, -0.2616, -0.226,  -0.3291, -0.2579, -0.1782, -0.2915,
+            -0.2278, -0.284 , -0.2268, -0.2239, -0.251,  -0.267,  -0.2479, -0.2715,
+            -0.2856, -0.2459, -0.215,  -0.2296, -0.2991, -0.1938, -0.3187, -0.2123
+
+        },
+    },
+};
+
+TEST_P(AdaptiveRKVBlockSumTest, BlockSumIsCorrect) {
+    auto test_struct = GetParam();
+    ASSERT_EQ(test_struct.in_data.size(), ov::shape_size(test_struct.in_shape));
+    ASSERT_EQ(test_struct.ref_out_data.size(), ov::shape_size(test_struct.out_shape));
+
+    ov::reference::AdaptiveRKVDiversityCalculator<double> calculator(DEFAULT_START_SIZE,
+                                                                     DEFAULT_EVICTION_SIZE,
+                                                                     test_struct.block_size);
+    std::vector<double> test_out_data(test_struct.ref_out_data.size());
+    calculator.block_sum_diversity_values(test_struct.in_data.data(), test_struct.in_shape, test_out_data.data(), test_struct.out_shape);
+
+    EXPECT_THAT(test_out_data, ::testing::Pointwise(::testing::DoubleNear(1e-5), test_struct.ref_out_data));
+}
+
+INSTANTIATE_TEST_SUITE_P(VariousInputs,
+                         AdaptiveRKVBlockSumTest,
+                         ::testing::ValuesIn(BLOCK_SUM_TEST_CASES));
+
+struct DiversityCalculateTestData {
+    ov::Shape in_shape;
+    std::vector<double> in_data;
+    double threshold;
+
+};
+
+struct E2EDiversityTestData {
+    ov::Shape k_shape;
+    std::vector<double> k_data;
+    size_t start_size;
+    size_t eviction_size;
+    std::vector<std::vector<double>> ref_diversity_data;
+};
+
+using AdaptiveRKVE2EDiversityTest = ::testing::TestWithParam<E2EDiversityTestData>;
+
+std::vector<E2EDiversityTestData> E2E_DIVERSITY_TEST_CASES = {
+    // basic
+    {
+        {1, 4, 1},
+        // clang-format off
+        {
+           1.0,
+           1.0,
+           1.0,
+           1.0
+        },
+        /* start_size = */ 2,
+        /* eviction_size = */ 2,
+        {{-1.0, -1.0}}
+    },
+    // larger basic
+    {
+        {1, 6, 1},
+        // clang-format off
+        {
+           6.5,
+           -11.0,
+           1.0,
+           1.0,
+           1.0,
+           1.0,
+        },
+        /* start_size = */ 2,
+        /* eviction_size = */ 4,
+        {{-1.0, -1.0, -2.0, -2.0},
+         {-2.0, -2.0, -1.0, -1.0}}
+    },
+    // two heads basic
+    {
+        {2, 8, 1},
+        // clang-format off
+        {
+           6.5,
+           -11.0,
+           1.0,
+           1.0,
+           1.0,
+           1.0,
+           42.0,
+           -13.7,
+
+            1337.0,
+            -1256.9,
+            -1.0,
+            -1.0,
+            -1.0,
+            -1.0,
+            0.2,
+            0.0
+        },
+        /* start_size = */ 2,
+        /* eviction_size = */ 4,
+        {{-1.0, -1.0, -2.0, -2.0},
+         {-2.0, -2.0, -1.0, -1.0}}
+    },
+    // zeroed second head (where it matters)
+    {
+        {2, 8, 1},
+        // clang-format off
+        {
+           6.5,
+           -11.0,
+           1.0,
+           1.0,
+           1.0,
+           1.0,
+           42.0,
+           -13.7,
+
+            1337.0,
+            -1256.9,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.2,
+            0.0
+        },
+        /* start_size = */ 2,
+        /* eviction_size = */ 4,
+        {{-0.5, -0.5, -1.0, -1.0},
+         {-1.0, -1.0, -0.5, -0.5}}
+    },
+    // more embedding dimensions
+    {
+        {2, 8, 4},
+        // clang-format off
+        {
+           6.5, 8.3, 5.1, -7.4,
+           -11.0, 1.9, 7.1, 4.8,
+           8.0, 8.0, 8.0, 8.0,
+           8.0, 8.0, 8.0, 8.0,
+           8.0, 8.0, 8.0, 8.0,
+           8.0, 8.0, 8.0, 8.0,
+           42.0, -41.7, 8.3, 1.0,
+           -13.7, 0.0, 0.0, 15.1,
+
+            1337.0, -1.9, -1.4, 475.1,
+            -1256.9, 1.0, 789.0, 1421.3,
+            -2.0, -2.0, -2.0, -2.0,
+            -2.0, -2.0, -2.0, -2.0,
+            -2.0, -2.0, -2.0, -2.0,
+            -2.0, -2.0, -2.0, -2.0,
+            0.2, -81.3, 74.3, -641.1,
+            0.0, 14.7, 98.1, -27.7
+        },
+        /* start_size = */ 2,
+        /* eviction_size = */ 4,
+        {{-1.0, -1.0, -2.0, -2.0},
+         {-2.0, -2.0, -1.0, -1.0}}
+    },
+    // orthogonal tokens
+    {
+        {2, 8, 4},
+        // clang-format off
+        {
+           6.5, 8.3, 5.1, -7.4,
+           -11.0, 1.9, 7.1, 4.8,
+           8.0,    0.0,   0.0, 0.0,
+           0.0,    0.0, -18.0, 0.0,
+           0.0,    0.0,   0.0, 0.1,
+           0.0, 1288.0,   0.0, 0.0,
+           42.0, -41.7, 8.3, 1.0,
+           -13.7, 0.0, 0.0, 15.1,
+
+            1337.0, -1.9, -1.4, 475.1,
+            -1256.9, 1.0, 789.0, 1421.3,
+            0.0,   0.0,  2.0,  0.0,
+            0.0, -12.0,  0.0,  0.0,
+            12.8,  0.0,  0.0,  0.0,
+            0.0,   0.0,  0.0, 65.5,
+            0.2, -81.3, 74.3, -641.1,
+            0.0, 14.7, 98.1, -27.7
+        },
+        /* start_size = */ 2,
+        /* eviction_size = */ 4,
+        {{0.0, 0.0, 0.0, 0.0},
+         {0.0, 0.0, 0.0, 0.0}}
+    },
+    // random excel-checked golden
+    {
+        {2, 10, 4},
+        // clang-format off
+        {
+              4.949, -7.294, -6.330,  3.757,
+             -3.561,  1.029,  5.030, -9.483,
+              5.350, -2.745, -1.404, -7.788,
+             -1.086,  4.576, -8.726, -8.815,
+              3.144,  8.512,  8.518, -8.386,
+              7.889, -5.721,  5.507,  4.295,
+             -6.624, -8.463,  7.474,  9.879,
+              4.534, -5.908, -9.388,  2.356,
+              7.497,  8.186, -8.658, -4.796,
+             -8.248, -9.797, -7.907, -4.513,
+
+              3.469,  7.633,  7.244, -6.844,
+             -7.173,  4.450,  6.705, -7.035,
+              8.773, -7.571, -9.878, -9.584,
+              0.807,  8.059, -7.172,  4.303,
+             -3.323, -8.852,  1.167, -1.126,
+             -4.428,  9.678, -6.547,  0.037,
+             -8.152, -9.865,  3.694, -7.650,
+              0.359,  8.018, -7.152, -6.242,
+             -9.120, -7.228, -9.186,  3.202,
+             -9.304, -0.401, -5.287,  6.834
+        },
+    // clang-format on
+
+        /* start_size = */ 2,
+        /* eviction_size = */ 6,
+        {
+            {-0.237145, -0.237145, -0.352696, -0.487902, -0.072365, -0.707192},
+            {-0.334657, -0.505941, 0, 0.036135, -0.634881,-0.490221},
+            {-0.380811, -0.398746801, -0.432080003, -0.693021748, 0, 0.067216441}
+        },
+    }
+};
+
+TEST_P(AdaptiveRKVE2EDiversityTest, CalculatesDiversityCorrectly) {
+    auto test_struct = GetParam();
+    ov::reference::AdaptiveRKVDiversityCalculator<double> calculator(test_struct.start_size,
+                                                                     test_struct.eviction_size,
+                                                                     DEFAULT_BLOCK_SIZE);
+
+    auto test_diversity = calculator.calculate_block_diversity(test_struct.k_data.data(), test_struct.k_shape);
+    ASSERT_EQ(test_diversity.size(), test_struct.ref_diversity_data.size());
+    for (size_t i = 0; i < test_diversity.size(); i++) {
+        ASSERT_EQ(test_diversity[i].size(), test_struct.ref_diversity_data[i].size());
+    }
+
+    for (size_t i = 0; i < test_diversity.size(); i++) {
+        EXPECT_THAT(test_diversity[i], ::testing::Pointwise(::testing::DoubleNear(1e-6), test_struct.ref_diversity_data[i]));
+    }
+
+};
+
+INSTANTIATE_TEST_SUITE_P(VariousInputs, AdaptiveRKVE2EDiversityTest, ::testing::ValuesIn(E2E_DIVERSITY_TEST_CASES));
+}

From c94e619a543ca993c517b2bc3fad89965fb9dd5f Mon Sep 17 00:00:00 2001
From: Vasily Shamporov <vasily.shamporov@intel.com>
Date: Wed, 8 Oct 2025 13:00:55 +0200
Subject: [PATCH 3/8] Add documentation

---
 .../reference/adaptive_rkv_diversity.hpp      | 101 +++++++++---------
 1 file changed, 50 insertions(+), 51 deletions(-)

diff --git a/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp b/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
index b3f4f755266279..2fbc906473dbcd 100644
--- a/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
+++ b/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
@@ -19,26 +19,18 @@
 namespace ov::reference {
 
 
-/** @brief Reference implementation of the XAttention sparse attention prefill mechanism
- * (https://arxiv.org/abs/2503.16428) */
+/** @brief Reference implementation of the Adaptive R-KV token diversity calculation mechanism
+ * (https://arxiv.org/pdf/2505.24133v3) */
 template <typename T>
 class AdaptiveRKVDiversityCalculator {
 public:
-    /** @param threshold Defines a threshold for introduced block sparsity - XAttention attempts to preserve the
-     * smallest subset of attention score matrix blocks so that the ratio of the attention score sum to the total sum of
-     * attention score matrix elements is no less than `threshold`. In other words, `threshold` defines a fraction of
-     * the attention score mass which is to be preserved by most "important" blocks. Valid range is 0.0-1.0, with 0.0
-     * corresponding to 0% of the blocks retained, and 1.0 corresponding to 100% of the blocks retained.
-     * @param block_size The size of blocks into which the attention score matrix [num_heads, query_token_dimension,
-     * key_token_dimension] will be subdivided for purposes of determining the subset of the most important blocks
-     * according to `threshold`. This subdivision occurs on query and key dimensions of the attention score matrix with
-     * the same granularity, i.e. the resulting blocks have equal size on both dimensions. Essentially `block_size`
-     * defines the granularity of the eventual sparse attention computations. Must be a multiple of `stride`.
-     * @param stride The stride at which the full attention matrix is subsampled in a block-antidiagonal fashion to
-     * estimate the block importance. Note that the full attention matrix is not computed, instead the original query
-     * and key matrices are reshaped appropriately so that only the necessary elements are computed. Ideally, the
-     * computational complexity of the entire block estimation operation is `stride` times lower than the full attention
-     * matrix computation.
+    /** @param start_size Size, in tokens, of the key cache area that will be ignored for purposes of diversity
+     * calculation, starting from the beginning of the token dimension ("start area"). Must be a multiple of `block_size`.
+     * @param eviction_size Size, in tokens, from the beginning of the start area, the tokens in which will be
+     * considred for purposes of diversity calculation ("eviction area"). The rest of the tokens after the eviction area,
+     * if any, are ignored. Must be a multiple of `block_size`.
+     * @param block_size Block size of the underlying paged attention implementation. The diversity values will be sum-reduced
+     * from per-token values to per-block values based on this number of tokens in a block.
      * */
     AdaptiveRKVDiversityCalculator(size_t start_size, size_t eviction_size, size_t block_size)
         : m_start_size(start_size),
@@ -48,17 +40,11 @@ class AdaptiveRKVDiversityCalculator {
         OPENVINO_ASSERT(eviction_size % block_size == 0);
     }
 
-    /** Divides the input rank-3 tensor into blocks along last two dimensions, performs the addition of the values
-     * inside each block and outputs each block sum into corresponding positions in the output tensor downsampled along
-     * the same dimensions. The output tensor dimensions are such that the query and key token dimensions are
-     * downsampled by `block_size` when compared to the *original* query and key tensors.
-     * @param attention_scores_data Pointer to the attention score input.
-     * @param attention_score_shape Shape of the attention score input tensor. Expected shape is [num_heads,
-     * num_query_tokens / stride, num_key_tokens / stride], where `num_query_tokens` and `num_key_tokens` must be
-     * multiples of `block_size`.
-     * @param out Pointer to the output tensor data (block sums)
-     * @param out_shape Shape of the output tensor data. Expected shape is [num_heads, num_query_tokens / block_size,
-     * num_key_tokens / block_size].
+    /** Fills the diagonal of each square matrix slice (at ranks 1 and 2, zero-based) of the input rank-3 tensor with
+     * a provided value. The operation is done in-place.
+     * @param in_out Pointer to the matrix data.
+     * @param in_out_shape Shape of the matrix data. Expected shape is [num_heads, token_dim, token_dim].
+     * @param val Value to fill in the diagonal positions.
      */
     void fill_diagonal_(T* in_out,
                         const Shape& in_out_shape,
@@ -77,6 +63,12 @@ class AdaptiveRKVDiversityCalculator {
         }
     }
 
+    /** For a rank-3 tensor, zeroes out the values that are less than the mean of the values of the corresponding slice at rank 2 (zero-based). Ranks 1 and 2 of the input tensor must be equal. Mean values are computed and provided externally. The operation is done in-place.
+     * @param in_out Pointer to the tensor data.
+     * @param in_out_shape Shape of the tensor data. Expected shape is [num_heads, token_dim, token_dim].
+     * @param means Pointer to the tensor data containing the means of each slice of the `in_out` tensor along its rank 2 (zero-based).
+     * @param means_shape Shape of the means tensor. Expected shape is [num_heads, token_dim].
+     */
     void fill_low_values_with_zeros_(T* in_out,
                                      const Shape& in_out_shape,
                                      const T* means,
@@ -102,17 +94,23 @@ class AdaptiveRKVDiversityCalculator {
         }
     }
 
-    void block_sum_diversity_values(const T* processed_similarity_token_data,
-                                    const Shape& processed_similarity_token_data_shape,
+    /** For a square matrix, sums each `block_size`-sized group of matrix rows to produce a row in the output matrix.
+     * @param in_data Pointer to the matrix data.
+     * @param in_shape Shape of the matrix data. Expected shape is [token_dim, token_dim], where token_dim must be a multiple of `block_size`.
+     * @param out Pointer to the output matrix data.
+     * @param out_shape Shape of the output matrix. Expected shape is [token_dim / block_size, token_dim].
+     */
+    void block_sum_diversity_values(const T* in_data,
+                                    const Shape& in_shape,
                                     T* out,
                                     const Shape& out_shape) {
-        OPENVINO_ASSERT(processed_similarity_token_data_shape.size() == 2);  // [token_dim, token_dim]
-        OPENVINO_ASSERT(processed_similarity_token_data_shape[0] == processed_similarity_token_data_shape[1]);
-        OPENVINO_ASSERT(processed_similarity_token_data_shape[0] % m_block_size == 0);
+        OPENVINO_ASSERT(in_shape.size() == 2);  // [token_dim, token_dim]
+        OPENVINO_ASSERT(in_shape[0] == in_shape[1]);
+        OPENVINO_ASSERT(in_shape[0] % m_block_size == 0);
 
         OPENVINO_ASSERT(out_shape.size() == 2);  // [block_dim, token_dim]
-        OPENVINO_ASSERT(out_shape[0] == processed_similarity_token_data_shape[0] / m_block_size);
-        OPENVINO_ASSERT(out_shape[1] == processed_similarity_token_data_shape[1]);
+        OPENVINO_ASSERT(out_shape[0] == in_shape[0] / m_block_size);
+        OPENVINO_ASSERT(out_shape[1] == in_shape[1]);
 
         std::memset(out, 0, out_shape[0] * out_shape[1] * sizeof(T));
 
@@ -121,28 +119,29 @@ class AdaptiveRKVDiversityCalculator {
             for (size_t out_token_dim_idx = 0; out_token_dim_idx < out_shape[1]; out_token_dim_idx++) {
                size_t in_block_offset = (out_block_dim_idx * m_block_size) * out_shape[1];
                for (size_t in_token_in_block_idx = 0; in_token_in_block_idx < m_block_size; in_token_in_block_idx++) {
-                  size_t source_offset = in_block_offset + in_token_in_block_idx * processed_similarity_token_data_shape[1] + out_token_dim_idx;
-                  out[out_block_offset + out_token_dim_idx] -= processed_similarity_token_data[source_offset];
+                  size_t source_offset = in_block_offset + in_token_in_block_idx * in_shape[1] + out_token_dim_idx;
+                  out[out_block_offset + out_token_dim_idx] -= in_data[source_offset];
                }
             }
         }
     }
 
-    /** Applies XAttention to the provided query and key matrices, returning the subset of the most important blocks for
-     * each attention head, according to the configured block size and threshold, which are to be preserved in the
-     * subsequent sparse attention computation.
-     * @param query_data Pointer to the query input tensor data
-     * @param query_shape Shape of the query input tensor data. Expected shape is [num_heads, num_query_tokens,
-     * head_size], where `num_query_tokens` must be a multiple of both `block_size` and `stride`, padded with zeroes if
-     * necessary to do so in the real-world scenario.
-     * @param key_data Pointer to the key input tensor data
+    /** Calculates token diversity in the eviction area, partially aggregating the results per-block. The resulting
+     * diversity values have the shape of [num_eviction_blocks (== eviction_size / block_size), eviction_size]. Note
+     * that the 1-st rank is left unaggregated when compared to the full diversity calculation algorithm. The reason
+     * for this is as follows. The final per-block diversity value computation relies on knowing the subset of blocks
+     * in the eviction area that will be retained regardless of calculated diversity. This subset must be filtered out
+     * from the rank-1 dimension when performing reduce-mean in the original algorithm to get 1 diversity value per block
+     * in the eviction area. Due to implementation specifics the paged attention kernel does not know ahead of time which
+     * blocks will be "retained" - this information is only available on the openvino.genai level after the PA kernel has executed.
+     * Therefore the PA kernel will provide raw per-token values on the rank 1 of the returned diversity value matrix and delegatei
+     * the final reduce-mean and filtering to the openvino.genai level.
+     * @param key_data Pointer to the key cache tensor data
      * @param key_shape Shape of the key input tensor data. Expected shape is [num_heads, num_key_tokens, head_size],
-     * where `num_key_tokens` must be a multiple of both `block_size` and `stride`, padded with zeroes if necessary to
-     * do so in the real-world scenario.
-     * @return A vector of size `num_heads` of sets, each set containing pairs of block indices (.first is the block
-     * index along the query dimension, .second - along the key). Each set is the head-specific subset of blocks that
-     * must be preserved in the sparse attention computation. Indices are given in units of XAttention-specific
-     * `block_size` (as configured), which may differ from the block size in the paged attention implementation.
+     * where `num_key_tokens` must be no less than `start_size + eviction_size`.
+     * @return A rank-2 matrix in the std::vector representation with dimensions [eviction_size / block_size, eviction_size] containing
+     * the diversity values. The values are expected to be further mean-reduced along rank 1 (zero-based) at the point in time when the
+     * subset of blocks to be exclusively retained is known.
      */
     std::vector<std::vector<T>> calculate_block_diversity(const T* key_data,
                                                   const Shape& key_shape) {

From ecd02cbcbc180a1db162527abf5ba7fb41d1ecd4 Mon Sep 17 00:00:00 2001
From: Vasily Shamporov <vasily.shamporov@intel.com>
Date: Wed, 8 Oct 2025 13:01:45 +0200
Subject: [PATCH 4/8] Format

---
 .../reference/adaptive_rkv_diversity.hpp      | 109 ++++++++++--------
 .../reference/adaptive_rkv_diversity.cpp      |  51 ++++----
 2 files changed, 86 insertions(+), 74 deletions(-)

diff --git a/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp b/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
index 2fbc906473dbcd..0da9a9bbfb6cc7 100644
--- a/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
+++ b/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
@@ -18,19 +18,19 @@
 
 namespace ov::reference {
 
-
 /** @brief Reference implementation of the Adaptive R-KV token diversity calculation mechanism
  * (https://arxiv.org/pdf/2505.24133v3) */
 template <typename T>
 class AdaptiveRKVDiversityCalculator {
 public:
     /** @param start_size Size, in tokens, of the key cache area that will be ignored for purposes of diversity
-     * calculation, starting from the beginning of the token dimension ("start area"). Must be a multiple of `block_size`.
+     * calculation, starting from the beginning of the token dimension ("start area"). Must be a multiple of
+     * `block_size`.
      * @param eviction_size Size, in tokens, from the beginning of the start area, the tokens in which will be
-     * considred for purposes of diversity calculation ("eviction area"). The rest of the tokens after the eviction area,
-     * if any, are ignored. Must be a multiple of `block_size`.
-     * @param block_size Block size of the underlying paged attention implementation. The diversity values will be sum-reduced
-     * from per-token values to per-block values based on this number of tokens in a block.
+     * considred for purposes of diversity calculation ("eviction area"). The rest of the tokens after the eviction
+     * area, if any, are ignored. Must be a multiple of `block_size`.
+     * @param block_size Block size of the underlying paged attention implementation. The diversity values will be
+     * sum-reduced from per-token values to per-block values based on this number of tokens in a block.
      * */
     AdaptiveRKVDiversityCalculator(size_t start_size, size_t eviction_size, size_t block_size)
         : m_start_size(start_size),
@@ -46,13 +46,10 @@ class AdaptiveRKVDiversityCalculator {
      * @param in_out_shape Shape of the matrix data. Expected shape is [num_heads, token_dim, token_dim].
      * @param val Value to fill in the diagonal positions.
      */
-    void fill_diagonal_(T* in_out,
-                        const Shape& in_out_shape,
-                        T val) {
-        OPENVINO_ASSERT(in_out_shape.size() == 3);  // [num_heads, token_dim, token_dim]
+    void fill_diagonal_(T* in_out, const Shape& in_out_shape, T val) {
+        OPENVINO_ASSERT(in_out_shape.size() == 3);            // [num_heads, token_dim, token_dim]
         OPENVINO_ASSERT(in_out_shape[1] == in_out_shape[2]);  // [num_heads, token_dim, token_dim]
 
-
         for (size_t head_idx = 0; head_idx < in_out_shape[0]; head_idx++) {
             size_t in_head_offset = head_idx * in_out_shape[1] * in_out_shape[2];
             for (size_t token_dim_idx = 0; token_dim_idx < in_out_shape[1]; token_dim_idx++) {
@@ -63,19 +60,19 @@ class AdaptiveRKVDiversityCalculator {
         }
     }
 
-    /** For a rank-3 tensor, zeroes out the values that are less than the mean of the values of the corresponding slice at rank 2 (zero-based). Ranks 1 and 2 of the input tensor must be equal. Mean values are computed and provided externally. The operation is done in-place.
+    /** For a rank-3 tensor, zeroes out the values that are less than the mean of the values of the corresponding slice
+     * at rank 2 (zero-based). Ranks 1 and 2 of the input tensor must be equal. Mean values are computed and provided
+     * externally. The operation is done in-place.
      * @param in_out Pointer to the tensor data.
      * @param in_out_shape Shape of the tensor data. Expected shape is [num_heads, token_dim, token_dim].
-     * @param means Pointer to the tensor data containing the means of each slice of the `in_out` tensor along its rank 2 (zero-based).
+     * @param means Pointer to the tensor data containing the means of each slice of the `in_out` tensor along its rank
+     * 2 (zero-based).
      * @param means_shape Shape of the means tensor. Expected shape is [num_heads, token_dim].
      */
-    void fill_low_values_with_zeros_(T* in_out,
-                                     const Shape& in_out_shape,
-                                     const T* means,
-                                     const Shape& means_shape) {
+    void fill_low_values_with_zeros_(T* in_out, const Shape& in_out_shape, const T* means, const Shape& means_shape) {
         OPENVINO_ASSERT(in_out_shape.size() == 3);  // [num_heads, token_dim, token_dim]
         OPENVINO_ASSERT(in_out_shape[1] == in_out_shape[2]);
-        OPENVINO_ASSERT(means_shape.size() == 2);   // [num_heads, token_dim]
+        OPENVINO_ASSERT(means_shape.size() == 2);  // [num_heads, token_dim]
         OPENVINO_ASSERT(means_shape[0] == in_out_shape[0]);
         OPENVINO_ASSERT(means_shape[1] == in_out_shape[1]);
 
@@ -96,14 +93,12 @@ class AdaptiveRKVDiversityCalculator {
 
     /** For a square matrix, sums each `block_size`-sized group of matrix rows to produce a row in the output matrix.
      * @param in_data Pointer to the matrix data.
-     * @param in_shape Shape of the matrix data. Expected shape is [token_dim, token_dim], where token_dim must be a multiple of `block_size`.
+     * @param in_shape Shape of the matrix data. Expected shape is [token_dim, token_dim], where token_dim must be a
+     * multiple of `block_size`.
      * @param out Pointer to the output matrix data.
      * @param out_shape Shape of the output matrix. Expected shape is [token_dim / block_size, token_dim].
      */
-    void block_sum_diversity_values(const T* in_data,
-                                    const Shape& in_shape,
-                                    T* out,
-                                    const Shape& out_shape) {
+    void block_sum_diversity_values(const T* in_data, const Shape& in_shape, T* out, const Shape& out_shape) {
         OPENVINO_ASSERT(in_shape.size() == 2);  // [token_dim, token_dim]
         OPENVINO_ASSERT(in_shape[0] == in_shape[1]);
         OPENVINO_ASSERT(in_shape[0] % m_block_size == 0);
@@ -117,11 +112,11 @@ class AdaptiveRKVDiversityCalculator {
         for (size_t out_block_dim_idx = 0; out_block_dim_idx < out_shape[0]; out_block_dim_idx++) {
             size_t out_block_offset = out_block_dim_idx * out_shape[1];
             for (size_t out_token_dim_idx = 0; out_token_dim_idx < out_shape[1]; out_token_dim_idx++) {
-               size_t in_block_offset = (out_block_dim_idx * m_block_size) * out_shape[1];
-               for (size_t in_token_in_block_idx = 0; in_token_in_block_idx < m_block_size; in_token_in_block_idx++) {
-                  size_t source_offset = in_block_offset + in_token_in_block_idx * in_shape[1] + out_token_dim_idx;
-                  out[out_block_offset + out_token_dim_idx] -= in_data[source_offset];
-               }
+                size_t in_block_offset = (out_block_dim_idx * m_block_size) * out_shape[1];
+                for (size_t in_token_in_block_idx = 0; in_token_in_block_idx < m_block_size; in_token_in_block_idx++) {
+                    size_t source_offset = in_block_offset + in_token_in_block_idx * in_shape[1] + out_token_dim_idx;
+                    out[out_block_offset + out_token_dim_idx] -= in_data[source_offset];
+                }
             }
         }
     }
@@ -131,37 +126,54 @@ class AdaptiveRKVDiversityCalculator {
      * that the 1-st rank is left unaggregated when compared to the full diversity calculation algorithm. The reason
      * for this is as follows. The final per-block diversity value computation relies on knowing the subset of blocks
      * in the eviction area that will be retained regardless of calculated diversity. This subset must be filtered out
-     * from the rank-1 dimension when performing reduce-mean in the original algorithm to get 1 diversity value per block
-     * in the eviction area. Due to implementation specifics the paged attention kernel does not know ahead of time which
-     * blocks will be "retained" - this information is only available on the openvino.genai level after the PA kernel has executed.
-     * Therefore the PA kernel will provide raw per-token values on the rank 1 of the returned diversity value matrix and delegatei
-     * the final reduce-mean and filtering to the openvino.genai level.
+     * from the rank-1 dimension when performing reduce-mean in the original algorithm to get 1 diversity value per
+     * block in the eviction area. Due to implementation specifics the paged attention kernel does not know ahead of
+     * time which blocks will be "retained" - this information is only available on the openvino.genai level after the
+     * PA kernel has executed. Therefore the PA kernel will provide raw per-token values on the rank 1 of the returned
+     * diversity value matrix and delegatei the final reduce-mean and filtering to the openvino.genai level.
      * @param key_data Pointer to the key cache tensor data
      * @param key_shape Shape of the key input tensor data. Expected shape is [num_heads, num_key_tokens, head_size],
      * where `num_key_tokens` must be no less than `start_size + eviction_size`.
-     * @return A rank-2 matrix in the std::vector representation with dimensions [eviction_size / block_size, eviction_size] containing
-     * the diversity values. The values are expected to be further mean-reduced along rank 1 (zero-based) at the point in time when the
-     * subset of blocks to be exclusively retained is known.
+     * @return A rank-2 matrix in the std::vector representation with dimensions [eviction_size / block_size,
+     * eviction_size] containing the diversity values. The values are expected to be further mean-reduced along rank 1
+     * (zero-based) at the point in time when the subset of blocks to be exclusively retained is known.
      */
-    std::vector<std::vector<T>> calculate_block_diversity(const T* key_data,
-                                                  const Shape& key_shape) {
-        OPENVINO_ASSERT(key_shape.size() == 3);    // [num_heads, key_token_len, head_dim]
+    std::vector<std::vector<T>> calculate_block_diversity(const T* key_data, const Shape& key_shape) {
+        OPENVINO_ASSERT(key_shape.size() == 3);  // [num_heads, key_token_len, head_dim]
         OPENVINO_ASSERT(key_shape[1] >= m_start_size + m_eviction_size);
 
-
         auto normalized_key_data_buf = allocate_buf(key_shape);
         // Should be safe to use this in-place
-        ov::reference::normalize_l2(key_data, normalized_key_data_buf.get(), key_shape, {2}, std::numeric_limits<float>::epsilon(), ov::op::EpsMode::ADD);
+        ov::reference::normalize_l2(key_data,
+                                    normalized_key_data_buf.get(),
+                                    key_shape,
+                                    {2},
+                                    std::numeric_limits<float>::epsilon(),
+                                    ov::op::EpsMode::ADD);
 
         Shape cos_similar_shape = {key_shape[0], key_shape[1], key_shape[1]};
         auto cos_similar_buf = allocate_buf(cos_similar_shape);
-        ov::reference::matmul(normalized_key_data_buf.get(), normalized_key_data_buf.get(), cos_similar_buf.get(), key_shape, key_shape, cos_similar_shape, /* transpose_arg0 = */ false, /* transpose_arg1 = */ true);
+        ov::reference::matmul(normalized_key_data_buf.get(),
+                              normalized_key_data_buf.get(),
+                              cos_similar_buf.get(),
+                              key_shape,
+                              key_shape,
+                              cos_similar_shape,
+                              /* transpose_arg0 = */ false,
+                              /* transpose_arg1 = */ true);
         normalized_key_data_buf.reset();
 
         Shape evictable_subset_shape = {key_shape[0], m_eviction_size, m_eviction_size};
         auto evictable_subset_buf = allocate_buf(evictable_subset_shape);
         // stops?
-        ov::reference::slice(reinterpret_cast<char*>(cos_similar_buf.get()), cos_similar_shape, reinterpret_cast<char*>(evictable_subset_buf.get()), evictable_subset_shape, sizeof(T), /* starts = */ {m_start_size, m_start_size}, /* steps = */ {1, 1}, /* axes = */{1, 2});
+        ov::reference::slice(reinterpret_cast<char*>(cos_similar_buf.get()),
+                             cos_similar_shape,
+                             reinterpret_cast<char*>(evictable_subset_buf.get()),
+                             evictable_subset_shape,
+                             sizeof(T),
+                             /* starts = */ {m_start_size, m_start_size},
+                             /* steps = */ {1, 1},
+                             /* axes = */ {1, 2});
         cos_similar_buf.reset();
 
         fill_diagonal_(evictable_subset_buf.get(), evictable_subset_shape, 0.0);
@@ -175,12 +187,18 @@ class AdaptiveRKVDiversityCalculator {
 
         Shape aggregated_token_similarities_shape = {m_eviction_size, m_eviction_size};
         auto aggregated_token_similarities_buf = allocate_buf(aggregated_token_similarities_shape);
-        ov::reference::reduce_mean(evictable_subset_buf.get(), aggregated_token_similarities_buf.get(), evictable_subset_shape, {0});
+        ov::reference::reduce_mean(evictable_subset_buf.get(),
+                                   aggregated_token_similarities_buf.get(),
+                                   evictable_subset_shape,
+                                   {0});
         evictable_subset_buf.reset();
 
         Shape block_diversity_shape = {m_eviction_size / m_block_size, m_eviction_size};
         auto block_diversity_buf = allocate_buf(block_diversity_shape);
-        block_sum_diversity_values(aggregated_token_similarities_buf.get(), aggregated_token_similarities_shape, block_diversity_buf.get(), block_diversity_shape);
+        block_sum_diversity_values(aggregated_token_similarities_buf.get(),
+                                   aggregated_token_similarities_shape,
+                                   block_diversity_buf.get(),
+                                   block_diversity_shape);
         std::vector<std::vector<T>> retval(block_diversity_shape[0], std::vector<T>(block_diversity_shape[1]));
         for (size_t block_idx = 0; block_idx < block_diversity_shape[0]; block_idx++) {
             for (size_t token_idx = 0; token_idx < block_diversity_shape[1]; token_idx++) {
@@ -199,7 +217,6 @@ class AdaptiveRKVDiversityCalculator {
         return std::shared_ptr<T[]>(new T[ov::shape_size(shape)]);
     }
 
-
     size_t m_start_size;
     size_t m_eviction_size;
     size_t m_block_size;
diff --git a/src/core/tests/reference/adaptive_rkv_diversity.cpp b/src/core/tests/reference/adaptive_rkv_diversity.cpp
index 6595162e1aa3ec..d3f71cdd3758c9 100644
--- a/src/core/tests/reference/adaptive_rkv_diversity.cpp
+++ b/src/core/tests/reference/adaptive_rkv_diversity.cpp
@@ -12,7 +12,6 @@ size_t DEFAULT_BLOCK_SIZE = 2;
 size_t DEFAULT_START_SIZE = 2;
 size_t DEFAULT_EVICTION_SIZE = 10;
 
-
 TEST(AdaptiveRKVE2ESmokeTest, CalculatesDiversityWithoutThrowing) {
     ov::reference::AdaptiveRKVDiversityCalculator<double> calculator(DEFAULT_START_SIZE,
                                                                      DEFAULT_EVICTION_SIZE,
@@ -24,7 +23,6 @@ TEST(AdaptiveRKVE2ESmokeTest, CalculatesDiversityWithoutThrowing) {
     EXPECT_NO_THROW(calculator.calculate_block_diversity(mock_data.data(), mock_shape));
 };
 
-
 struct FillDiagonalTestData {
     ov::Shape in_shape;
     std::vector<double> in_data;
@@ -33,10 +31,9 @@ struct FillDiagonalTestData {
 
 using AdaptiveRKVDiversityFillDiagonalTest = ::testing::TestWithParam<FillDiagonalTestData>;
 
-std::vector<FillDiagonalTestData> FILL_DIAGONAL_TEST_CASES = {
-    {
-        {2, 4, 4},
-        // clang-format off
+std::vector<FillDiagonalTestData> FILL_DIAGONAL_TEST_CASES = {{
+    {2, 4, 4},
+    // clang-format off
         {
              3.144,  8.512,  8.518, -8.386,
              7.889, -5.721,  5.507,  4.295,
@@ -48,9 +45,9 @@ std::vector<FillDiagonalTestData> FILL_DIAGONAL_TEST_CASES = {
              3.469,  7.633,  7.244, -6.844,
             -7.173,  4.450,  6.705, -7.035
         },
-        // clang-format on
+    // clang-format on
 
-        // clang-format off
+    // clang-format off
         {
              42.00,  8.512,  8.518, -8.386,
              7.889,  42.00,  5.507,  4.295,
@@ -62,21 +59,20 @@ std::vector<FillDiagonalTestData> FILL_DIAGONAL_TEST_CASES = {
              3.469,  7.633,  42.00, -6.844,
             -7.173,  4.450,  6.705,  42.00
         },
-        // clang-format on
-    }
-};
+    // clang-format on
+}};
 
 TEST_P(AdaptiveRKVDiversityFillDiagonalTest, FillsDiagonal) {
     auto test_struct = GetParam();
     ASSERT_EQ(test_struct.in_data.size(), ov::shape_size(test_struct.in_shape));
     ASSERT_EQ(test_struct.ref_out_data.size(), ov::shape_size(test_struct.in_shape));
 
-    ov::reference::AdaptiveRKVDiversityCalculator<double> calculator(DEFAULT_START_SIZE, DEFAULT_EVICTION_SIZE, DEFAULT_BLOCK_SIZE);
+    ov::reference::AdaptiveRKVDiversityCalculator<double> calculator(DEFAULT_START_SIZE,
+                                                                     DEFAULT_EVICTION_SIZE,
+                                                                     DEFAULT_BLOCK_SIZE);
 
     std::vector<double> test_out_data = test_struct.in_data;
-    calculator.fill_diagonal_(test_out_data.data(),
-                              test_struct.in_shape,
-                              42.0);
+    calculator.fill_diagonal_(test_out_data.data(), test_struct.in_shape, 42.0);
     EXPECT_EQ(test_out_data, test_struct.ref_out_data);
 }
 
@@ -152,7 +148,10 @@ TEST_P(AdaptiveRKVFillLowValuesWithZerosTest, FillsLowValuesWithZero) {
                                                                      DEFAULT_EVICTION_SIZE,
                                                                      DEFAULT_BLOCK_SIZE);
     std::vector<double> test_out_data = test_struct.in_data;
-    calculator.fill_low_values_with_zeros_(test_out_data.data(), test_struct.in_shape, test_struct.means.data(), test_struct.means_shape);
+    calculator.fill_low_values_with_zeros_(test_out_data.data(),
+                                           test_struct.in_shape,
+                                           test_struct.means.data(),
+                                           test_struct.means_shape);
 
     EXPECT_THAT(test_out_data, ::testing::Pointwise(::testing::DoubleNear(1e-8), test_struct.ref_out_data));
 }
@@ -161,7 +160,6 @@ INSTANTIATE_TEST_SUITE_P(VariousInputs,
                          AdaptiveRKVFillLowValuesWithZerosTest,
                          ::testing::ValuesIn(FILL_LOW_VALUES_WITH_ZEROS_TEST_CASES));
 
-
 struct BlockSumTestData {
     ov::Shape in_shape;
     std::vector<double> in_data;
@@ -409,17 +407,14 @@ std::vector<E2EDiversityTestData> E2E_DIVERSITY_TEST_CASES = {
              -9.120, -7.228, -9.186,  3.202,
              -9.304, -0.401, -5.287,  6.834
         },
-    // clang-format on
+        // clang-format on
 
         /* start_size = */ 2,
         /* eviction_size = */ 6,
-        {
-            {-0.237145, -0.237145, -0.352696, -0.487902, -0.072365, -0.707192},
-            {-0.334657, -0.505941, 0, 0.036135, -0.634881,-0.490221},
-            {-0.380811, -0.398746801, -0.432080003, -0.693021748, 0, 0.067216441}
-        },
-    }
-};
+        {{-0.237145, -0.237145, -0.352696, -0.487902, -0.072365, -0.707192},
+         {-0.334657, -0.505941, 0, 0.036135, -0.634881, -0.490221},
+         {-0.380811, -0.398746801, -0.432080003, -0.693021748, 0, 0.067216441}},
+    }};
 
 TEST_P(AdaptiveRKVE2EDiversityTest, CalculatesDiversityCorrectly) {
     auto test_struct = GetParam();
@@ -434,10 +429,10 @@ TEST_P(AdaptiveRKVE2EDiversityTest, CalculatesDiversityCorrectly) {
     }
 
     for (size_t i = 0; i < test_diversity.size(); i++) {
-        EXPECT_THAT(test_diversity[i], ::testing::Pointwise(::testing::DoubleNear(1e-6), test_struct.ref_diversity_data[i]));
+        EXPECT_THAT(test_diversity[i],
+                    ::testing::Pointwise(::testing::DoubleNear(1e-6), test_struct.ref_diversity_data[i]));
     }
-
 };
 
 INSTANTIATE_TEST_SUITE_P(VariousInputs, AdaptiveRKVE2EDiversityTest, ::testing::ValuesIn(E2E_DIVERSITY_TEST_CASES));
-}
+}  // namespace adaptive_rkv_test

From fde6e790feb7ab8eead4a524fa86887b6af2a46a Mon Sep 17 00:00:00 2001
From: Vasily Shamporov <vasily.shamporov@intel.com>
Date: Thu, 9 Oct 2025 10:55:20 +0200
Subject: [PATCH 5/8] Fix comments

---
 .../openvino/reference/adaptive_rkv_diversity.hpp | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp b/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
index 0da9a9bbfb6cc7..5e10e251f95b59 100644
--- a/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
+++ b/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
@@ -4,17 +4,11 @@
 
 #pragma once
 
-#include <cmath>
-#include <cstddef>
-#include <memory>
-#include <queue>
-
 #include "openvino/op/util/attr_types.hpp"
 #include "openvino/reference/matmul.hpp"
 #include "openvino/reference/normalize_l2.hpp"
 #include "openvino/reference/reduce_mean.hpp"
 #include "openvino/reference/slice.hpp"
-#include "openvino/runtime/tensor.hpp"
 
 namespace ov::reference {
 
@@ -27,7 +21,7 @@ class AdaptiveRKVDiversityCalculator {
      * calculation, starting from the beginning of the token dimension ("start area"). Must be a multiple of
      * `block_size`.
      * @param eviction_size Size, in tokens, from the beginning of the start area, the tokens in which will be
-     * considred for purposes of diversity calculation ("eviction area"). The rest of the tokens after the eviction
+     * considered for purposes of diversity calculation ("eviction area"). The rest of the tokens after the eviction
      * area, if any, are ignored. Must be a multiple of `block_size`.
      * @param block_size Block size of the underlying paged attention implementation. The diversity values will be
      * sum-reduced from per-token values to per-block values based on this number of tokens in a block.
@@ -92,6 +86,8 @@ class AdaptiveRKVDiversityCalculator {
     }
 
     /** For a square matrix, sums each `block_size`-sized group of matrix rows to produce a row in the output matrix.
+     * In the overall algorithm context, each summed value represents diversity (the negative of inter-token cosine
+     * similarity), where larger absolute values indicate greater diversity.
      * @param in_data Pointer to the matrix data.
      * @param in_shape Shape of the matrix data. Expected shape is [token_dim, token_dim], where token_dim must be a
      * multiple of `block_size`.
@@ -130,7 +126,7 @@ class AdaptiveRKVDiversityCalculator {
      * block in the eviction area. Due to implementation specifics the paged attention kernel does not know ahead of
      * time which blocks will be "retained" - this information is only available on the openvino.genai level after the
      * PA kernel has executed. Therefore the PA kernel will provide raw per-token values on the rank 1 of the returned
-     * diversity value matrix and delegatei the final reduce-mean and filtering to the openvino.genai level.
+     * diversity value matrix and delegate the final reduce-mean and filtering to the openvino.genai level.
      * @param key_data Pointer to the key cache tensor data
      * @param key_shape Shape of the key input tensor data. Expected shape is [num_heads, num_key_tokens, head_size],
      * where `num_key_tokens` must be no less than `start_size + eviction_size`.
@@ -165,7 +161,6 @@ class AdaptiveRKVDiversityCalculator {
 
         Shape evictable_subset_shape = {key_shape[0], m_eviction_size, m_eviction_size};
         auto evictable_subset_buf = allocate_buf(evictable_subset_shape);
-        // stops?
         ov::reference::slice(reinterpret_cast<char*>(cos_similar_buf.get()),
                              cos_similar_shape,
                              reinterpret_cast<char*>(evictable_subset_buf.get()),
@@ -173,7 +168,7 @@ class AdaptiveRKVDiversityCalculator {
                              sizeof(T),
                              /* starts = */ {m_start_size, m_start_size},
                              /* steps = */ {1, 1},
-                             /* axes = */ {1, 2});
+                             /* axes = */ {1, 2}); // stops are defined by output shape
         cos_similar_buf.reset();
 
         fill_diagonal_(evictable_subset_buf.get(), evictable_subset_shape, 0.0);

From 809acff183d4ef6f9c1fd99a4cef0da6120c9434 Mon Sep 17 00:00:00 2001
From: Vasily Shamporov <vasily.shamporov@intel.com>
Date: Thu, 9 Oct 2025 13:38:12 +0200
Subject: [PATCH 6/8] Fix format

---
 .../include/openvino/reference/adaptive_rkv_diversity.hpp       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp b/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
index 5e10e251f95b59..9ddd371c5df14a 100644
--- a/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
+++ b/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
@@ -168,7 +168,7 @@ class AdaptiveRKVDiversityCalculator {
                              sizeof(T),
                              /* starts = */ {m_start_size, m_start_size},
                              /* steps = */ {1, 1},
-                             /* axes = */ {1, 2}); // stops are defined by output shape
+                             /* axes = */ {1, 2});  // stops are defined by output shape
         cos_similar_buf.reset();
 
         fill_diagonal_(evictable_subset_buf.get(), evictable_subset_shape, 0.0);

From 44000b48dd49c3e53893ff5e92d2220e245ff897 Mon Sep 17 00:00:00 2001
From: Vasily Shamporov <vasily.shamporov@intel.com>
Date: Fri, 10 Oct 2025 10:59:03 +0200
Subject: [PATCH 7/8] Fix warnings

---
 .../include/openvino/reference/adaptive_rkv_diversity.hpp       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp b/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
index 9ddd371c5df14a..2a0c4b7bca56a2 100644
--- a/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
+++ b/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
@@ -166,7 +166,7 @@ class AdaptiveRKVDiversityCalculator {
                              reinterpret_cast<char*>(evictable_subset_buf.get()),
                              evictable_subset_shape,
                              sizeof(T),
-                             /* starts = */ {m_start_size, m_start_size},
+                             /* starts = */ {static_cast<int64_t>(m_start_size), static_cast<int64_t>(m_start_size)},
                              /* steps = */ {1, 1},
                              /* axes = */ {1, 2});  // stops are defined by output shape
         cos_similar_buf.reset();

From 82d425c5d91a2bf26d322c039ad2d2a2cfc0948b Mon Sep 17 00:00:00 2001
From: Vasily Shamporov <vasily.shamporov@intel.com>
Date: Sat, 11 Oct 2025 11:16:53 +0200
Subject: [PATCH 8/8] mt