Skip to content

Commit f04e2c0

Browse files
Address Review Comments
1 parent 13103cb commit f04e2c0

File tree

1 file changed

+13
-9
lines changed

1 file changed

+13
-9
lines changed

src/plugins/intel_npu/src/plugin/npuw/llm_eagle3_extension.cpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -196,23 +196,27 @@ void Eagle3Extension::accumulate_chunk_last_hidden_state(
196196
const auto& target_shape = m_last_hidden_state->get_shape();
197197
const uint32_t target_total_len = static_cast<uint32_t>(target_shape[1]);
198198

199+
OPENVINO_ASSERT(target_total_len == total_seq_len,
200+
"Pre-allocated tensor size (" + std::to_string(target_total_len) + ") must match total_seq_len (" +
201+
std::to_string(total_seq_len) + ")");
202+
199203
OPENVINO_ASSERT(m_chunked_seq_offset + chunk_token_count <= target_total_len,
200-
"Chunked sequence offset exceeds pre-allocated size");
204+
"Can't write chunk by stored chunked sequence offset and requested number of tokens, as it will "
205+
"exceed pre-allocated size");
201206

202207
// Extract only the rightmost chunk_token_count tokens from the output
203208
// The chunk_output is right-aligned with padding on the left
209+
constexpr uint32_t seq_dim = 1;
204210
const uint32_t chunk_start_offset = chunk_seq_len - chunk_token_count;
205-
const size_t hidden_elem_size = chunk_output->get_element_type().size();
206-
const size_t row_bytes = hidden_size * hidden_elem_size;
207211

208-
const uint8_t* chunk_ptr = reinterpret_cast<const uint8_t*>(chunk_output->data());
209-
chunk_ptr += chunk_start_offset * row_bytes; // Skip padding, point to valid tokens
212+
auto chunk_output_slice = util::make_tensor_slice(chunk_output, seq_dim, chunk_start_offset, chunk_seq_len);
210213

211-
// Copy chunk data directly to the correct position in pre-allocated tensor
212-
uint8_t* dst_ptr = reinterpret_cast<uint8_t*>(m_last_hidden_state->data());
213-
dst_ptr += m_chunked_seq_offset * row_bytes; // Move to the current write position
214+
auto target_slice = util::make_tensor_slice(m_last_hidden_state,
215+
seq_dim,
216+
m_chunked_seq_offset,
217+
m_chunked_seq_offset + chunk_token_count);
214218

215-
std::copy_n(chunk_ptr, chunk_token_count * row_bytes, dst_ptr);
219+
chunk_output_slice->copy_to(target_slice._ptr);
216220

217221
LOG_VERB("Eagle3: Copied chunk [" << chunk_start_offset << ":" << chunk_seq_len << "] to position ["
218222
<< m_chunked_seq_offset << ":" << (m_chunked_seq_offset + chunk_token_count)

0 commit comments

Comments
 (0)