Add check to only for do_penalities

yeonsily · yeonsily · commit e684eb59cdfe · 2025-09-24T16:31:23.000Z
diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py
@@ -4024,7 +4024,8 @@ def try_revert_dummy_output_tokens():
                 if self.do_mark_step:
                     htorch.core.mark_step()
                 if hasattr(self.model.sampler, '_sampling_tensors') and \
-                    self.model.sampler._sampling_tensors is not None:
+                    self.model.sampler._sampling_tensors is not None and \
+                    self.model.sampler._do_penalties:
                     sampling_tensors = self.model.sampler._sampling_tensors
                     if sampling_tensors.prompt_tokens.numel() > 0:
                         # Cache the prompt_tokens tensor that's already on HPU