Skip to content

Commit cd1c053

Browse files
committed
Fix comments
1 parent 4de52d3 commit cd1c053

File tree

1 file changed

+5
-10
lines changed

1 file changed

+5
-10
lines changed

src/core/reference/include/openvino/reference/adaptive_rkv_diversity.hpp

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,11 @@
44

55
#pragma once
66

7-
#include <cmath>
8-
#include <cstddef>
9-
#include <memory>
10-
#include <queue>
11-
127
#include "openvino/op/util/attr_types.hpp"
138
#include "openvino/reference/matmul.hpp"
149
#include "openvino/reference/normalize_l2.hpp"
1510
#include "openvino/reference/reduce_mean.hpp"
1611
#include "openvino/reference/slice.hpp"
17-
#include "openvino/runtime/tensor.hpp"
1812

1913
namespace ov::reference {
2014

@@ -27,7 +21,7 @@ class AdaptiveRKVDiversityCalculator {
2721
* calculation, starting from the beginning of the token dimension ("start area"). Must be a multiple of
2822
* `block_size`.
2923
* @param eviction_size Size, in tokens, from the beginning of the start area, the tokens in which will be
30-
* considred for purposes of diversity calculation ("eviction area"). The rest of the tokens after the eviction
24+
* considered for purposes of diversity calculation ("eviction area"). The rest of the tokens after the eviction
3125
* area, if any, are ignored. Must be a multiple of `block_size`.
3226
* @param block_size Block size of the underlying paged attention implementation. The diversity values will be
3327
* sum-reduced from per-token values to per-block values based on this number of tokens in a block.
@@ -92,6 +86,8 @@ class AdaptiveRKVDiversityCalculator {
9286
}
9387

9488
/** For a square matrix, sums each `block_size`-sized group of matrix rows to produce a row in the output matrix.
89+
* In the overall algorithm context, each summed value represents diversity (the negative of inter-token cosine
90+
* similarity), where larger absolute values indicate greater diversity.
9591
* @param in_data Pointer to the matrix data.
9692
* @param in_shape Shape of the matrix data. Expected shape is [token_dim, token_dim], where token_dim must be a
9793
* multiple of `block_size`.
@@ -130,7 +126,7 @@ class AdaptiveRKVDiversityCalculator {
130126
* block in the eviction area. Due to implementation specifics the paged attention kernel does not know ahead of
131127
* time which blocks will be "retained" - this information is only available on the openvino.genai level after the
132128
* PA kernel has executed. Therefore the PA kernel will provide raw per-token values on the rank 1 of the returned
133-
* diversity value matrix and delegatei the final reduce-mean and filtering to the openvino.genai level.
129+
* diversity value matrix and delegate the final reduce-mean and filtering to the openvino.genai level.
134130
* @param key_data Pointer to the key cache tensor data
135131
* @param key_shape Shape of the key input tensor data. Expected shape is [num_heads, num_key_tokens, head_size],
136132
* where `num_key_tokens` must be no less than `start_size + eviction_size`.
@@ -165,15 +161,14 @@ class AdaptiveRKVDiversityCalculator {
165161

166162
Shape evictable_subset_shape = {key_shape[0], m_eviction_size, m_eviction_size};
167163
auto evictable_subset_buf = allocate_buf(evictable_subset_shape);
168-
// stops?
169164
ov::reference::slice(reinterpret_cast<char*>(cos_similar_buf.get()),
170165
cos_similar_shape,
171166
reinterpret_cast<char*>(evictable_subset_buf.get()),
172167
evictable_subset_shape,
173168
sizeof(T),
174169
/* starts = */ {m_start_size, m_start_size},
175170
/* steps = */ {1, 1},
176-
/* axes = */ {1, 2});
171+
/* axes = */ {1, 2}); // stops are defined by output shape
177172
cos_similar_buf.reset();
178173

179174
fill_diagonal_(evictable_subset_buf.get(), evictable_subset_shape, 0.0);

0 commit comments

Comments
 (0)