We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 183a709 commit 8c742a6Copy full SHA for 8c742a6
vllm/v1/worker/gpu_model_runner.py
@@ -827,13 +827,13 @@ def _prepare_inputs(
827
blk_table_tensor = torch.zeros(
828
(num_reqs, 1),
829
dtype=torch.int32,
830
- pin_memory=self.pin_memory,
831
- device="cpu").to(self.device, non_blocking=True)
832
- slot_mapping = torch.zeros((total_num_scheduled_tokens, ),
833
- dtype=torch.int32,
834
835
- device="cpu").to(self.device,
836
- non_blocking=True)
+ device=self.device,
+ )
+ slot_mapping = torch.zeros(
+ (total_num_scheduled_tokens, ),
+ dtype=torch.int64,
837
num_common_prefix_blocks = 0
838
else:
839
blk_table = self.input_batch.block_table[kv_cache_group_id]
0 commit comments