We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent b38c808 commit e684eb5Copy full SHA for e684eb5
vllm/worker/hpu_model_runner.py
@@ -4024,7 +4024,8 @@ def try_revert_dummy_output_tokens():
4024
if self.do_mark_step:
4025
htorch.core.mark_step()
4026
if hasattr(self.model.sampler, '_sampling_tensors') and \
4027
- self.model.sampler._sampling_tensors is not None:
+ self.model.sampler._sampling_tensors is not None and \
4028
+ self.model.sampler._do_penalties:
4029
sampling_tensors = self.model.sampler._sampling_tensors
4030
if sampling_tensors.prompt_tokens.numel() > 0:
4031
# Cache the prompt_tokens tensor that's already on HPU
0 commit comments