9
9
#include < memory>
10
10
#include < queue>
11
11
12
+ #include " openvino/op/util/attr_types.hpp"
12
13
#include " openvino/reference/matmul.hpp"
13
14
#include " openvino/reference/normalize_l2.hpp"
14
15
#include " openvino/reference/reduce_mean.hpp"
@@ -59,7 +60,7 @@ class AdaptiveRKVDiversityCalculator {
59
60
* @param out_shape Shape of the output tensor data. Expected shape is [num_heads, num_query_tokens / block_size,
60
61
* num_key_tokens / block_size].
61
62
*/
62
- void fill_diagonal_ (const T* in_out,
63
+ void fill_diagonal_ (T* in_out,
63
64
const Shape& in_out_shape,
64
65
T val) {
65
66
OPENVINO_ASSERT (in_out_shape.size () == 3 ); // [num_heads, token_dim, token_dim]
@@ -76,7 +77,7 @@ class AdaptiveRKVDiversityCalculator {
76
77
}
77
78
}
78
79
79
- void fill_low_values_with_zeros_ (const T* in_out,
80
+ void fill_low_values_with_zeros_ (T* in_out,
80
81
const Shape& in_out_shape,
81
82
const T* means,
82
83
const Shape& means_shape) {
@@ -121,7 +122,7 @@ class AdaptiveRKVDiversityCalculator {
121
122
size_t in_block_offset = (out_block_dim_idx * m_block_size) * out_shape[1 ];
122
123
for (size_t in_token_in_block_idx = 0 ; in_token_in_block_idx < m_block_size; in_token_in_block_idx++) {
123
124
size_t source_offset = in_block_offset + in_token_in_block_idx * processed_similarity_token_data_shape[1 ] + out_token_dim_idx;
124
- out[out_block_offset + out_token_dim_idx] + = processed_similarity_token_data[source_offset];
125
+ out[out_block_offset + out_token_dim_idx] - = processed_similarity_token_data[source_offset];
125
126
}
126
127
}
127
128
}
@@ -146,19 +147,22 @@ class AdaptiveRKVDiversityCalculator {
146
147
std::vector<std::vector<T>> calculate_block_diversity (const T* key_data,
147
148
const Shape& key_shape) {
148
149
OPENVINO_ASSERT (key_shape.size () == 3 ); // [num_heads, key_token_len, head_dim]
149
- OPENVINO_ASSERT (key_shape[1 ] >= m_block_size * ( m_start_size + m_eviction_size) );
150
+ OPENVINO_ASSERT (key_shape[1 ] >= m_start_size + m_eviction_size);
150
151
152
+
153
+ auto normalized_key_data_buf = allocate_buf (key_shape);
151
154
// Should be safe to use this in-place
152
- ov::reference::normalize_l2 (key_data, key_data , key_shape, {2 }, std::numeric_limits<T >::epsilon ());
155
+ ov::reference::normalize_l2 (key_data, normalized_key_data_buf. get () , key_shape, {2 }, std::numeric_limits<float >::epsilon (), ov::op::EpsMode::ADD );
153
156
154
157
Shape cos_similar_shape = {key_shape[0 ], key_shape[1 ], key_shape[1 ]};
155
158
auto cos_similar_buf = allocate_buf (cos_similar_shape);
156
- ov::reference::matmul (key_data, key_data, cos_similar_buf.get (), key_shape, key_shape, cos_similar_shape, /* transpose_arg0 = */ false , /* transpose_arg1 = */ true );
159
+ ov::reference::matmul (normalized_key_data_buf.get (), normalized_key_data_buf.get (), cos_similar_buf.get (), key_shape, key_shape, cos_similar_shape, /* transpose_arg0 = */ false , /* transpose_arg1 = */ true );
160
+ normalized_key_data_buf.reset ();
157
161
158
162
Shape evictable_subset_shape = {key_shape[0 ], m_eviction_size, m_eviction_size};
159
163
auto evictable_subset_buf = allocate_buf (evictable_subset_shape);
160
164
// stops?
161
- ov::reference::slice (cos_similar_buf.get (), cos_similar_shape, evictable_subset_buf.get (), evictable_subset_shape, sizeof (T), /* starts = */ {m_start_size, m_start_size}, /* steps = */ {1 , 1 }, /* axes = */ {1 , 2 });
165
+ ov::reference::slice (reinterpret_cast < char *>( cos_similar_buf.get ()) , cos_similar_shape, reinterpret_cast < char *>( evictable_subset_buf.get () ), evictable_subset_shape, sizeof (T), /* starts = */ {m_start_size, m_start_size}, /* steps = */ {1 , 1 }, /* axes = */ {1 , 2 });
162
166
cos_similar_buf.reset ();
163
167
164
168
fill_diagonal_ (evictable_subset_buf.get (), evictable_subset_shape, 0.0 );
@@ -168,6 +172,7 @@ class AdaptiveRKVDiversityCalculator {
168
172
ov::reference::reduce_mean (evictable_subset_buf.get (), means_buf.get (), evictable_subset_shape, {2 });
169
173
170
174
fill_low_values_with_zeros_ (evictable_subset_buf.get (), evictable_subset_shape, means_buf.get (), means_shape);
175
+ means_buf.reset ();
171
176
172
177
Shape aggregated_token_similarities_shape = {m_eviction_size, m_eviction_size};
173
178
auto aggregated_token_similarities_buf = allocate_buf (aggregated_token_similarities_shape);
@@ -180,7 +185,7 @@ class AdaptiveRKVDiversityCalculator {
180
185
std::vector<std::vector<T>> retval (block_diversity_shape[0 ], std::vector<T>(block_diversity_shape[1 ]));
181
186
for (size_t block_idx = 0 ; block_idx < block_diversity_shape[0 ]; block_idx++) {
182
187
for (size_t token_idx = 0 ; token_idx < block_diversity_shape[1 ]; token_idx++) {
183
- retval[block_idx][token_idx] = block_diversity_buf. get () + block_idx * block_diversity_shape[1 ] + token_idx;
188
+ retval[block_idx][token_idx] = block_diversity_buf[ block_idx * block_diversity_shape[1 ] + token_idx] ;
184
189
}
185
190
}
186
191
0 commit comments