refact attn metadata build

weiguihua2 · weiguihua2 · commit 2fbf6d9ebad0 · 2025-08-18T17:22:07.000+08:00
Signed-off-by: weiguihua2 &lt;weiguihua2@huawei.com&gt;
diff --git a/tests/ut/attention/test_mla_v1.py b/tests/ut/attention/test_mla_v1.py
@@ -333,16 +333,6 @@ def test_build_dummy(self, mock_ascend_config):
         ascend_config = MagicMock()
         mock_ascend_config.return_value = ascend_config
         ascend_config.torchair_graph_config.enabled = False
-        # runner = MagicMock()
-        # runner.model_config = MagicMock()
-        # runner.device = "cpu"
-        # runner.graph_block_tables = torch.zeros((8, 64), dtype=torch.int32)
-        # runner.model_config.get_head_size.return_value = 64
-        # runner.chunked_prefill_enabled = False
-        # runner.attn_mask = torch.zeros((1, 1), dtype=torch.bool)
-        # runner.spec_attn_mask = torch.zeros((1, 1), dtype=torch.bool)
-        # runner.dtype = torch.float16
-        # runner.decode_token_per_req = 1
 
         mock_vllm_config = MagicMock()
         mock_vllm_config.model_config.max_model_len = 1024
diff --git a/vllm_ascend/attention/attention_v1_torchair.py b/vllm_ascend/attention/attention_v1_torchair.py
@@ -259,9 +259,6 @@ def build(
         query_start_loc = query_start_loc_cpu.to(self.device,
                                                  non_blocking=True)
         query_lens = query_start_loc_cpu[1:] - query_start_loc_cpu[:-1]
-        # input_positions = common_attn_metadata.positions_cpu[:num_actual_tokens].to(
-        #     device, non_blocking=True).long()
-
         input_positions = common_attn_metadata.positions[:
                                                          num_actual_tokens].long(
                                                          )