We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 8fa324d commit 80dfd67Copy full SHA for 80dfd67
tensorrt_llm/_torch/attention_backend/trtllm.py
@@ -904,13 +904,6 @@ def prepare_flash_mla(self) -> None:
904
self.block_ids_per_seq[:self.num_generations, :num_blocks].copy_(
905
block_ids_per_seq[self.num_contexts:], non_blocking=True)
906
907
- self.kv_lens_cuda_runtime = self.kv_lens_cuda[:self.num_seqs]
908
- self.kv_lens_runtime = self.kv_lens[:self.num_seqs]
909
- self.prompt_lens_cuda_runtime = self.prompt_lens_cuda[:self.num_seqs]
910
- self.prompt_lens_cpu_runtime = self.prompt_lens_cpu[:self.num_seqs]
911
- self.host_request_types_runtime = self.host_request_types[:self.
912
- num_seqs]
913
-
914
def pre_process_for_chunked_prefill(
915
self,
916
chunked_seq_len: torch.Tensor,
0 commit comments