Skip to content

Commit 59ad44e

Browse files
committed
bug fix
1 parent 978be5b commit 59ad44e

File tree

4 files changed

+530
-920
lines changed

4 files changed

+530
-920
lines changed

fastdeploy/model_executor/layers/sample/sampler.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,12 @@ def forward_npu(
335335

336336
_, next_tokens = top_p_sampling_npu(probs, sampling_metadata.top_p)
337337

338-
return next_tokens
338+
sampler_output = SamplerOutput(
339+
sampled_token_ids=next_tokens,
340+
logprobs_tensors=None,
341+
)
342+
343+
return sampler_output
339344

340345

341346
class SpeculativeSampler(nn.Layer):

fastdeploy/model_executor/ops/npu/rebuild_padding.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ def rebuild_padding(
1212
padding_offset,
1313
max_model_len
1414
):
15-
model_output=paddle.cast(model_output, paddle.float16)
15+
# Cast to float16 for NPU kernel as required, then cast back to original dtype
16+
original_dtype = model_output.dtype
17+
model_output = paddle.cast(model_output, paddle.float16)
1618

1719
out = core.eager._run_custom_op(
1820
"rebuild_padding_v2",
@@ -23,5 +25,8 @@ def rebuild_padding(
2325
max_model_len
2426
)[0]
2527

28+
# Cast back to original dtype to maintain consistency
29+
out = paddle.cast(out, original_dtype)
30+
2631

2732
return out

0 commit comments

Comments
 (0)