Skip to content

Commit 84877e7

Browse files
authored
[INTEL_HPU] added rebuild_padding_v3 for fastdeploy/pp opt (#1865)
1 parent a4c2f9e commit 84877e7

File tree

1 file changed

+20
-0
lines changed

1 file changed

+20
-0
lines changed

backends/intel_hpu/custom_ops/python/paddlenlp_ops/llama_block_atten.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,26 @@ def rebuild_padding_v2(
235235
return output_data
236236

237237

238+
def rebuild_padding_v3(
239+
tmp_out,
240+
batch_ids,
241+
total_batch,
242+
seq_lens_encoder,
243+
is_prompt=None,
244+
):
245+
dim_emb = tmp_out.shape[-1]
246+
output_data = paddle.zeros((batch_ids.shape[0], dim_emb))
247+
if is_prompt is True: # context
248+
tmp_out = tmp_out.reshape([total_batch, -1, dim_emb])
249+
for i in range(batch_ids.shape[0]):
250+
seq_len = seq_lens_encoder[batch_ids[i]].item()
251+
output_data[i] = tmp_out[i, seq_len - 1]
252+
elif is_prompt is False:
253+
output_data = tmp_out[: batch_ids.shape[0], :]
254+
255+
return output_data
256+
257+
238258
def fused_flatpa_proj_ref(
239259
query,
240260
key_cache,

0 commit comments

Comments
 (0)