Format

vshampor · vshampor · commit 4de52d337053 · 2025-10-08T15:30:40.000+02:00
diff --git a/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp b/src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp
@@ -18,19 +18,19 @@
 
 namespace ov::reference {
 
-
 /** @brief Reference implementation of the Adaptive R-KV token diversity calculation mechanism
  * (https://arxiv.org/pdf/2505.24133v3) */
 template <typename T>
 class AdaptiveRKVDiversityCalculator {
 public:
     /** @param start_size Size, in tokens, of the key cache area that will be ignored for purposes of diversity
-     * calculation, starting from the beginning of the token dimension ("start area"). Must be a multiple of `block_size`.
+     * calculation, starting from the beginning of the token dimension ("start area"). Must be a multiple of
+     * `block_size`.
      * @param eviction_size Size, in tokens, from the beginning of the start area, the tokens in which will be
-     * considred for purposes of diversity calculation ("eviction area"). The rest of the tokens after the eviction area,
-     * if any, are ignored. Must be a multiple of `block_size`.
-     * @param block_size Block size of the underlying paged attention implementation. The diversity values will be sum-reduced
-     * from per-token values to per-block values based on this number of tokens in a block.
+     * considred for purposes of diversity calculation ("eviction area"). The rest of the tokens after the eviction
+     * area, if any, are ignored. Must be a multiple of `block_size`.
+     * @param block_size Block size of the underlying paged attention implementation. The diversity values will be
+     * sum-reduced from per-token values to per-block values based on this number of tokens in a block.
      * */
     AdaptiveRKVDiversityCalculator(size_t start_size, size_t eviction_size, size_t block_size)
         : m_start_size(start_size),
@@ -46,13 +46,10 @@ class AdaptiveRKVDiversityCalculator {
      * @param in_out_shape Shape of the matrix data. Expected shape is [num_heads, token_dim, token_dim].
      * @param val Value to fill in the diagonal positions.
      */
-    void fill_diagonal_(T* in_out,
-                        const Shape& in_out_shape,
-                        T val) {
-        OPENVINO_ASSERT(in_out_shape.size() == 3);  // [num_heads, token_dim, token_dim]
+    void fill_diagonal_(T* in_out, const Shape& in_out_shape, T val) {
+        OPENVINO_ASSERT(in_out_shape.size() == 3);            // [num_heads, token_dim, token_dim]
         OPENVINO_ASSERT(in_out_shape[1] == in_out_shape[2]);  // [num_heads, token_dim, token_dim]
 
-
         for (size_t head_idx = 0; head_idx < in_out_shape[0]; head_idx++) {
             size_t in_head_offset = head_idx * in_out_shape[1] * in_out_shape[2];
             for (size_t token_dim_idx = 0; token_dim_idx < in_out_shape[1]; token_dim_idx++) {
@@ -63,19 +60,19 @@ class AdaptiveRKVDiversityCalculator {
         }
     }
 
-    /** For a rank-3 tensor, zeroes out the values that are less than the mean of the values of the corresponding slice at rank 2 (zero-based). Ranks 1 and 2 of the input tensor must be equal. Mean values are computed and provided externally. The operation is done in-place.
+    /** For a rank-3 tensor, zeroes out the values that are less than the mean of the values of the corresponding slice
+     * at rank 2 (zero-based). Ranks 1 and 2 of the input tensor must be equal. Mean values are computed and provided
+     * externally. The operation is done in-place.
      * @param in_out Pointer to the tensor data.
      * @param in_out_shape Shape of the tensor data. Expected shape is [num_heads, token_dim, token_dim].
-     * @param means Pointer to the tensor data containing the means of each slice of the `in_out` tensor along its rank 2 (zero-based).
+     * @param means Pointer to the tensor data containing the means of each slice of the `in_out` tensor along its rank
+     * 2 (zero-based).
      * @param means_shape Shape of the means tensor. Expected shape is [num_heads, token_dim].
      */
-    void fill_low_values_with_zeros_(T* in_out,
-                                     const Shape& in_out_shape,
-                                     const T* means,
-                                     const Shape& means_shape) {
+    void fill_low_values_with_zeros_(T* in_out, const Shape& in_out_shape, const T* means, const Shape& means_shape) {
         OPENVINO_ASSERT(in_out_shape.size() == 3);  // [num_heads, token_dim, token_dim]
         OPENVINO_ASSERT(in_out_shape[1] == in_out_shape[2]);
-        OPENVINO_ASSERT(means_shape.size() == 2);   // [num_heads, token_dim]
+        OPENVINO_ASSERT(means_shape.size() == 2);  // [num_heads, token_dim]
         OPENVINO_ASSERT(means_shape[0] == in_out_shape[0]);
         OPENVINO_ASSERT(means_shape[1] == in_out_shape[1]);
 
@@ -96,14 +93,12 @@ class AdaptiveRKVDiversityCalculator {
 
     /** For a square matrix, sums each `block_size`-sized group of matrix rows to produce a row in the output matrix.
      * @param in_data Pointer to the matrix data.
-     * @param in_shape Shape of the matrix data. Expected shape is [token_dim, token_dim], where token_dim must be a multiple of `block_size`.
+     * @param in_shape Shape of the matrix data. Expected shape is [token_dim, token_dim], where token_dim must be a
+     * multiple of `block_size`.
      * @param out Pointer to the output matrix data.
      * @param out_shape Shape of the output matrix. Expected shape is [token_dim / block_size, token_dim].
      */
-    void block_sum_diversity_values(const T* in_data,
-                                    const Shape& in_shape,
-                                    T* out,
-                                    const Shape& out_shape) {
+    void block_sum_diversity_values(const T* in_data, const Shape& in_shape, T* out, const Shape& out_shape) {
         OPENVINO_ASSERT(in_shape.size() == 2);  // [token_dim, token_dim]
         OPENVINO_ASSERT(in_shape[0] == in_shape[1]);
         OPENVINO_ASSERT(in_shape[0] % m_block_size == 0);
@@ -117,11 +112,11 @@ class AdaptiveRKVDiversityCalculator {
         for (size_t out_block_dim_idx = 0; out_block_dim_idx < out_shape[0]; out_block_dim_idx++) {
             size_t out_block_offset = out_block_dim_idx * out_shape[1];
             for (size_t out_token_dim_idx = 0; out_token_dim_idx < out_shape[1]; out_token_dim_idx++) {
-               size_t in_block_offset = (out_block_dim_idx * m_block_size) * out_shape[1];
-               for (size_t in_token_in_block_idx = 0; in_token_in_block_idx < m_block_size; in_token_in_block_idx++) {
-                  size_t source_offset = in_block_offset + in_token_in_block_idx * in_shape[1] + out_token_dim_idx;
-                  out[out_block_offset + out_token_dim_idx] -= in_data[source_offset];
-               }
+                size_t in_block_offset = (out_block_dim_idx * m_block_size) * out_shape[1];
+                for (size_t in_token_in_block_idx = 0; in_token_in_block_idx < m_block_size; in_token_in_block_idx++) {
+                    size_t source_offset = in_block_offset + in_token_in_block_idx * in_shape[1] + out_token_dim_idx;
+                    out[out_block_offset + out_token_dim_idx] -= in_data[source_offset];
+                }
             }
         }
     }
@@ -131,37 +126,54 @@ class AdaptiveRKVDiversityCalculator {
      * that the 1-st rank is left unaggregated when compared to the full diversity calculation algorithm. The reason
      * for this is as follows. The final per-block diversity value computation relies on knowing the subset of blocks
      * in the eviction area that will be retained regardless of calculated diversity. This subset must be filtered out
-     * from the rank-1 dimension when performing reduce-mean in the original algorithm to get 1 diversity value per block
-     * in the eviction area. Due to implementation specifics the paged attention kernel does not know ahead of time which
-     * blocks will be "retained" - this information is only available on the openvino.genai level after the PA kernel has executed.
-     * Therefore the PA kernel will provide raw per-token values on the rank 1 of the returned diversity value matrix and delegatei
-     * the final reduce-mean and filtering to the openvino.genai level.
+     * from the rank-1 dimension when performing reduce-mean in the original algorithm to get 1 diversity value per
+     * block in the eviction area. Due to implementation specifics the paged attention kernel does not know ahead of
+     * time which blocks will be "retained" - this information is only available on the openvino.genai level after the
+     * PA kernel has executed. Therefore the PA kernel will provide raw per-token values on the rank 1 of the returned
+     * diversity value matrix and delegatei the final reduce-mean and filtering to the openvino.genai level.
      * @param key_data Pointer to the key cache tensor data
      * @param key_shape Shape of the key input tensor data. Expected shape is [num_heads, num_key_tokens, head_size],
      * where `num_key_tokens` must be no less than `start_size + eviction_size`.
-     * @return A rank-2 matrix in the std::vector representation with dimensions [eviction_size / block_size, eviction_size] containing
-     * the diversity values. The values are expected to be further mean-reduced along rank 1 (zero-based) at the point in time when the
-     * subset of blocks to be exclusively retained is known.
+     * @return A rank-2 matrix in the std::vector representation with dimensions [eviction_size / block_size,
+     * eviction_size] containing the diversity values. The values are expected to be further mean-reduced along rank 1
+     * (zero-based) at the point in time when the subset of blocks to be exclusively retained is known.
      */
-    std::vector<std::vector<T>> calculate_block_diversity(const T* key_data,
-                                                  const Shape& key_shape) {
-        OPENVINO_ASSERT(key_shape.size() == 3);    // [num_heads, key_token_len, head_dim]
+    std::vector<std::vector<T>> calculate_block_diversity(const T* key_data, const Shape& key_shape) {
+        OPENVINO_ASSERT(key_shape.size() == 3);  // [num_heads, key_token_len, head_dim]
         OPENVINO_ASSERT(key_shape[1] >= m_start_size + m_eviction_size);
 
-
         auto normalized_key_data_buf = allocate_buf(key_shape);
         // Should be safe to use this in-place
-        ov::reference::normalize_l2(key_data, normalized_key_data_buf.get(), key_shape, {2}, std::numeric_limits<float>::epsilon(), ov::op::EpsMode::ADD);
+        ov::reference::normalize_l2(key_data,
+                                    normalized_key_data_buf.get(),
+                                    key_shape,
+                                    {2},
+                                    std::numeric_limits<float>::epsilon(),
+                                    ov::op::EpsMode::ADD);
 
         Shape cos_similar_shape = {key_shape[0], key_shape[1], key_shape[1]};
         auto cos_similar_buf = allocate_buf(cos_similar_shape);
-        ov::reference::matmul(normalized_key_data_buf.get(), normalized_key_data_buf.get(), cos_similar_buf.get(), key_shape, key_shape, cos_similar_shape, /* transpose_arg0 = */ false, /* transpose_arg1 = */ true);
+        ov::reference::matmul(normalized_key_data_buf.get(),
+                              normalized_key_data_buf.get(),
+                              cos_similar_buf.get(),
+                              key_shape,
+                              key_shape,
+                              cos_similar_shape,
+                              /* transpose_arg0 = */ false,
+                              /* transpose_arg1 = */ true);
         normalized_key_data_buf.reset();
 
         Shape evictable_subset_shape = {key_shape[0], m_eviction_size, m_eviction_size};
         auto evictable_subset_buf = allocate_buf(evictable_subset_shape);
         // stops?
-        ov::reference::slice(reinterpret_cast<char*>(cos_similar_buf.get()), cos_similar_shape, reinterpret_cast<char*>(evictable_subset_buf.get()), evictable_subset_shape, sizeof(T), /* starts = */ {m_start_size, m_start_size}, /* steps = */ {1, 1}, /* axes = */{1, 2});
+        ov::reference::slice(reinterpret_cast<char*>(cos_similar_buf.get()),
+                             cos_similar_shape,
+                             reinterpret_cast<char*>(evictable_subset_buf.get()),
+                             evictable_subset_shape,
+                             sizeof(T),
+                             /* starts = */ {m_start_size, m_start_size},
+                             /* steps = */ {1, 1},
+                             /* axes = */ {1, 2});
         cos_similar_buf.reset();
 
         fill_diagonal_(evictable_subset_buf.get(), evictable_subset_shape, 0.0);
@@ -175,12 +187,18 @@ class AdaptiveRKVDiversityCalculator {
 
         Shape aggregated_token_similarities_shape = {m_eviction_size, m_eviction_size};
         auto aggregated_token_similarities_buf = allocate_buf(aggregated_token_similarities_shape);
-        ov::reference::reduce_mean(evictable_subset_buf.get(), aggregated_token_similarities_buf.get(), evictable_subset_shape, {0});
+        ov::reference::reduce_mean(evictable_subset_buf.get(),
+                                   aggregated_token_similarities_buf.get(),
+                                   evictable_subset_shape,
+                                   {0});
         evictable_subset_buf.reset();
 
         Shape block_diversity_shape = {m_eviction_size / m_block_size, m_eviction_size};
         auto block_diversity_buf = allocate_buf(block_diversity_shape);
-        block_sum_diversity_values(aggregated_token_similarities_buf.get(), aggregated_token_similarities_shape, block_diversity_buf.get(), block_diversity_shape);
+        block_sum_diversity_values(aggregated_token_similarities_buf.get(),
+                                   aggregated_token_similarities_shape,
+                                   block_diversity_buf.get(),
+                                   block_diversity_shape);
         std::vector<std::vector<T>> retval(block_diversity_shape[0], std::vector<T>(block_diversity_shape[1]));
         for (size_t block_idx = 0; block_idx < block_diversity_shape[0]; block_idx++) {
             for (size_t token_idx = 0; token_idx < block_diversity_shape[1]; token_idx++) {
@@ -199,7 +217,6 @@ class AdaptiveRKVDiversityCalculator {
         return std::shared_ptr<T[]>(new T[ov::shape_size(shape)]);
     }
 
-
     size_t m_start_size;
     size_t m_eviction_size;
     size_t m_block_size;
diff --git a/src/core/tests/reference/adaptive_rkv_diversity.cpp b/src/core/tests/reference/adaptive_rkv_diversity.cpp
@@ -12,7 +12,6 @@ size_t DEFAULT_BLOCK_SIZE = 2;
 size_t DEFAULT_START_SIZE = 2;
 size_t DEFAULT_EVICTION_SIZE = 10;
 
-
 TEST(AdaptiveRKVE2ESmokeTest, CalculatesDiversityWithoutThrowing) {
     ov::reference::AdaptiveRKVDiversityCalculator<double> calculator(DEFAULT_START_SIZE,
                                                                      DEFAULT_EVICTION_SIZE,
@@ -24,7 +23,6 @@ TEST(AdaptiveRKVE2ESmokeTest, CalculatesDiversityWithoutThrowing) {
     EXPECT_NO_THROW(calculator.calculate_block_diversity(mock_data.data(), mock_shape));
 };
 
-
 struct FillDiagonalTestData {
     ov::Shape in_shape;
     std::vector<double> in_data;
@@ -33,10 +31,9 @@ struct FillDiagonalTestData {
 
 using AdaptiveRKVDiversityFillDiagonalTest = ::testing::TestWithParam<FillDiagonalTestData>;
 
-std::vector<FillDiagonalTestData> FILL_DIAGONAL_TEST_CASES = {
-    {
-        {2, 4, 4},
-        // clang-format off
+std::vector<FillDiagonalTestData> FILL_DIAGONAL_TEST_CASES = {{
+    {2, 4, 4},
+    // clang-format off
         {
              3.144,  8.512,  8.518, -8.386,
              7.889, -5.721,  5.507,  4.295,
@@ -48,9 +45,9 @@ std::vector<FillDiagonalTestData> FILL_DIAGONAL_TEST_CASES = {
              3.469,  7.633,  7.244, -6.844,
             -7.173,  4.450,  6.705, -7.035
         },
-        // clang-format on
+    // clang-format on
 
-        // clang-format off
+    // clang-format off
         {
              42.00,  8.512,  8.518, -8.386,
              7.889,  42.00,  5.507,  4.295,
@@ -62,21 +59,20 @@ std::vector<FillDiagonalTestData> FILL_DIAGONAL_TEST_CASES = {
              3.469,  7.633,  42.00, -6.844,
             -7.173,  4.450,  6.705,  42.00
         },
-        // clang-format on
-    }
-};
+    // clang-format on
+}};
 
 TEST_P(AdaptiveRKVDiversityFillDiagonalTest, FillsDiagonal) {
     auto test_struct = GetParam();
     ASSERT_EQ(test_struct.in_data.size(), ov::shape_size(test_struct.in_shape));
     ASSERT_EQ(test_struct.ref_out_data.size(), ov::shape_size(test_struct.in_shape));
 
-    ov::reference::AdaptiveRKVDiversityCalculator<double> calculator(DEFAULT_START_SIZE, DEFAULT_EVICTION_SIZE, DEFAULT_BLOCK_SIZE);
+    ov::reference::AdaptiveRKVDiversityCalculator<double> calculator(DEFAULT_START_SIZE,
+                                                                     DEFAULT_EVICTION_SIZE,
+                                                                     DEFAULT_BLOCK_SIZE);
 
     std::vector<double> test_out_data = test_struct.in_data;
-    calculator.fill_diagonal_(test_out_data.data(),
-                              test_struct.in_shape,
-                              42.0);
+    calculator.fill_diagonal_(test_out_data.data(), test_struct.in_shape, 42.0);
     EXPECT_EQ(test_out_data, test_struct.ref_out_data);
 }
 
@@ -152,7 +148,10 @@ TEST_P(AdaptiveRKVFillLowValuesWithZerosTest, FillsLowValuesWithZero) {
                                                                      DEFAULT_EVICTION_SIZE,
                                                                      DEFAULT_BLOCK_SIZE);
     std::vector<double> test_out_data = test_struct.in_data;
-    calculator.fill_low_values_with_zeros_(test_out_data.data(), test_struct.in_shape, test_struct.means.data(), test_struct.means_shape);
+    calculator.fill_low_values_with_zeros_(test_out_data.data(),
+                                           test_struct.in_shape,
+                                           test_struct.means.data(),
+                                           test_struct.means_shape);
 
     EXPECT_THAT(test_out_data, ::testing::Pointwise(::testing::DoubleNear(1e-8), test_struct.ref_out_data));
 }
@@ -161,7 +160,6 @@ INSTANTIATE_TEST_SUITE_P(VariousInputs,
                          AdaptiveRKVFillLowValuesWithZerosTest,
                          ::testing::ValuesIn(FILL_LOW_VALUES_WITH_ZEROS_TEST_CASES));
 
-
 struct BlockSumTestData {
     ov::Shape in_shape;
     std::vector<double> in_data;
@@ -409,17 +407,14 @@ std::vector<E2EDiversityTestData> E2E_DIVERSITY_TEST_CASES = {
              -9.120, -7.228, -9.186,  3.202,
              -9.304, -0.401, -5.287,  6.834
         },
-    // clang-format on
+        // clang-format on
 
         /* start_size = */ 2,
         /* eviction_size = */ 6,
-        {
-            {-0.237145, -0.237145, -0.352696, -0.487902, -0.072365, -0.707192},
-            {-0.334657, -0.505941, 0, 0.036135, -0.634881,-0.490221},
-            {-0.380811, -0.398746801, -0.432080003, -0.693021748, 0, 0.067216441}
-        },
-    }
-};
+        {{-0.237145, -0.237145, -0.352696, -0.487902, -0.072365, -0.707192},
+         {-0.334657, -0.505941, 0, 0.036135, -0.634881, -0.490221},
+         {-0.380811, -0.398746801, -0.432080003, -0.693021748, 0, 0.067216441}},
+    }};
 
 TEST_P(AdaptiveRKVE2EDiversityTest, CalculatesDiversityCorrectly) {
     auto test_struct = GetParam();
@@ -434,10 +429,10 @@ TEST_P(AdaptiveRKVE2EDiversityTest, CalculatesDiversityCorrectly) {
     }
 
     for (size_t i = 0; i < test_diversity.size(); i++) {
-        EXPECT_THAT(test_diversity[i], ::testing::Pointwise(::testing::DoubleNear(1e-6), test_struct.ref_diversity_data[i]));
+        EXPECT_THAT(test_diversity[i],
+                    ::testing::Pointwise(::testing::DoubleNear(1e-6), test_struct.ref_diversity_data[i]));
     }
-
 };
 
 INSTANTIATE_TEST_SUITE_P(VariousInputs, AdaptiveRKVE2EDiversityTest, ::testing::ValuesIn(E2E_DIVERSITY_TEST_CASES));
-}
+}  // namespace adaptive_rkv_test