We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent f6d0f6a commit 65acff2Copy full SHA for 65acff2
vllm/v1/worker/hpu_model_runner.py
@@ -573,10 +573,11 @@ def __init__(
573
self.input_batch = InputBatch(
574
max_num_reqs=self.scheduler_config.max_num_seqs,
575
max_model_len=self.max_model_len,
576
- max_num_blocks_per_req=self.max_num_blocks_per_req,
+ max_num_batched_tokens=self.max_num_tokens,
577
device=self.device,
578
pin_memory=self.pin_memory,
579
vocab_size=self.model_config.get_vocab_size(),
580
+ block_sizes=[self.block_size]
581
)
582
self.mem_margin = None
583
0 commit comments