refact model runner

weiguihua2 · weiguihua2 · commit 38c5dea4e2fd · 2025-08-19T15:50:50.000+08:00
Signed-off-by: weiguihua2 &lt;weiguihua2@huawei.com&gt;
diff --git a/vllm_ascend/torchair/torchair_attention.py b/vllm_ascend/torchair/torchair_attention.py
@@ -25,6 +25,7 @@
 from vllm.attention.backends.abstract import (AttentionImpl, AttentionLayer,
                                               AttentionType)
 from vllm.attention.backends.utils import PAD_SLOT_ID
+from vllm.config import VllmConfig
 from vllm.utils import cdiv
 
 from vllm_ascend.attention.attention_v1 import (AscendAttentionBackend,
@@ -97,8 +98,10 @@ class AscendTorchairMetadata(AscendMetadata):
 
 class AscendAttentionTorchairMetadataBuilder(AscendAttentionMetadataBuilder):
 
-    def __init__(self, runner):
-        super().__init__(runner)
+    def __init__(self,
+        vllm_config: VllmConfig,
+        device: torch.device,):
+        super().__init__(vllm_config, device)
         self.max_num_blocks_per_req = cdiv(
             self.model_config.max_model_len,
             self.vllm_config.cache_config.block_size)