resolve comments

ganyi1996ppo · ganyi1996ppo · commit ee0a366fbd50 · 2026-01-21T02:22:24.000Z
Signed-off-by: ganyi &lt;ygan@amd.com&gt;
diff --git a/vllm/v1/attention/backends/mla/rocm_aiter_mla_sparse.py b/vllm/v1/attention/backends/mla/rocm_aiter_mla_sparse.py
@@ -157,7 +157,6 @@ def __init__(
         parallel_config = vllm_config.parallel_config
         self.device = device
         max_num_batched_tokens = vllm_config.scheduler_config.max_num_batched_tokens
-        max_num_seqs = vllm_config.scheduler_config.max_num_seqs
 
         self.num_heads = self.model_config.get_num_attention_heads(parallel_config)
         self.mla_dims = get_mla_dims(self.model_config)
@@ -179,10 +178,10 @@ def __init__(
             device=device,
         )
         self.qo_indptr = torch.arange(
-            0, max_num_seqs + 1, dtype=torch.int32, device=device
+            0, max_num_batched_tokens + 1, dtype=torch.int32, device=device
         )
         self.paged_kv_last_page_len = torch.ones(
-            max_num_seqs, dtype=torch.int32, device=device
+            max_num_batched_tokens, dtype=torch.int32, device=device
         )
 
         # These two needs to be calculated in runtime,
@@ -193,7 +192,7 @@ def __init__(
             device=device,
         )
         self.paged_kv_indptr = torch.zeros(
-            [max_num_seqs + 1], dtype=torch.int32, device=device
+            [max_num_batched_tokens + 1], dtype=torch.int32, device=device
         )
 
     def build(
@@ -203,7 +202,6 @@ def build(
         fast_build: bool = False,
     ) -> ROCMAiterMLASparseMetadata:
         num_tokens = common_attn_metadata.num_actual_tokens
-        num_reqs = common_attn_metadata.num_reqs
         starts = np.asarray(common_attn_metadata.query_start_loc_cpu, dtype=np.int32)
         seg_lengths = np.diff(starts)
         req_id_per_token = np.repeat(
@@ -218,11 +216,11 @@ def build(
         self.paged_kv_indptr.fill_(0)
 
         req_id_per_token = self.req_id_per_token_buffer[:num_tokens]
-        qo_indptr = self.qo_indptr[: num_reqs + 1]
-        paged_kv_last_page_len = self.paged_kv_last_page_len[:num_reqs]
+        qo_indptr = self.qo_indptr[: num_tokens + 1]
+        paged_kv_last_page_len = self.paged_kv_last_page_len[:num_tokens]
         paged_kv_indices = self.paged_kv_indices[: num_tokens * self.topk_tokens]
-        paged_kv_indptr = self.paged_kv_indptr[: num_reqs + 1]
-        paged_kv_indptr_rest = self.paged_kv_indptr[num_reqs + 1 :]
+        paged_kv_indptr = self.paged_kv_indptr[: num_tokens + 1]
+        paged_kv_indptr_rest = self.paged_kv_indptr[num_tokens + 1 :]
 
         metadata = ROCMAiterMLASparseMetadata(
             num_reqs=common_attn_metadata.num_reqs,