ci fix

JC-ut0 · JC-ut0 · commit 9e2ddeee9fa6 · 2025-08-16T10:24:39.000+08:00
diff --git a/vllm_ascend/attention/attention_v1.py b/vllm_ascend/attention/attention_v1.py
@@ -21,20 +21,19 @@
 
 import torch
 import torch_npu
-from vllm.config import VllmConfig
 from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
                                               AttentionLayer, AttentionType)
 from vllm.attention.backends.utils import CommonAttentionState
-from vllm.config import get_current_vllm_config
+from vllm.config import VllmConfig, get_current_vllm_config
 from vllm.forward_context import ForwardContext, get_forward_context
 from vllm.utils import direct_register_custom_op
 from vllm.v1.core.sched.output import SchedulerOutput
 from vllm.v1.worker.gpu_input_batch import InputBatch
 
+from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
 from vllm_ascend.multistream.base import MSAttentionMetadataSplitConfig
 from vllm_ascend.ops.attention import vanilla_chunked_prefill
 from vllm_ascend.utils import get_graph_params
-from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
 
 
 class AscendAttentionBackend(AttentionBackend):
diff --git a/vllm_ascend/attention/mla_v1.py b/vllm_ascend/attention/mla_v1.py
@@ -4,20 +4,20 @@
 import numpy as np
 import torch
 import torch_npu
-from vllm_ascend import envs
 from vllm.attention.backends.abstract import (AttentionBackend, AttentionLayer,
                                               AttentionMetadata,
                                               MLAAttentionImpl)
 from vllm.attention.backends.utils import PAD_SLOT_ID
-from vllm.config import get_current_vllm_config, VllmConfig
+from vllm.config import VllmConfig, get_current_vllm_config
 from vllm.model_executor.layers.linear import (LinearBase,
                                                UnquantizedLinearMethod)
 from vllm.utils import cdiv, round_down
 
-from vllm_ascend.attention.utils import (AscendCommonAttentionMetadata,split_decodes_and_prefills)
-
+from vllm_ascend import envs
 from vllm_ascend.ascend_config import get_ascend_config
 from vllm_ascend.attention.attention_v1 import AscendAttentionState
+from vllm_ascend.attention.utils import (AscendCommonAttentionMetadata,
+                                         split_decodes_and_prefills)
 from vllm_ascend.multistream.base import MSAttentionMetadataSplitConfig
 from vllm_ascend.multistream.context import get_multistream_comm_context
 from vllm_ascend.multistream.ms_split import model_input_split_v1_mla_attn
diff --git a/vllm_ascend/attention/utils.py b/vllm_ascend/attention/utils.py
@@ -1,5 +1,4 @@
 from dataclasses import dataclass
-
 from typing import Any, Optional
 
 import torch
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
@@ -77,6 +77,7 @@
 from vllm_ascend.ascend_forward_context import set_ascend_forward_context
 from vllm_ascend.attention.attention import AttentionMaskBuilder
 from vllm_ascend.attention.attention_v1 import AscendAttentionState
+from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
 from vllm_ascend.distributed.utils import is_lmhead_tp
 from vllm_ascend.eplb.adaptor.vllm_adaptor import VllmEplbAdaptor
 from vllm_ascend.eplb.eplb_updator import EplbUpdator
@@ -87,7 +88,6 @@
                                check_torchair_cache_exist,
                                write_kv_cache_bytes_to_file)
 from vllm_ascend.worker.mtp_proposer_v1 import MtpProposer
-from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
 
 if TYPE_CHECKING:
     import xgrammar as xgr  # type: ignore[import-untyped]
diff --git a/vllm_ascend/worker/mtp_proposer_v1.py b/vllm_ascend/worker/mtp_proposer_v1.py
@@ -16,10 +16,10 @@
 
 from vllm_ascend.ascend_config import get_ascend_config
 from vllm_ascend.ascend_forward_context import set_ascend_forward_context
+from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
 from vllm_ascend.distributed.utils import is_lmhead_tp
 from vllm_ascend.models.deepseek_mtp import CustomDeepSeekMTP
 from vllm_ascend.utils import ProfileExecuteDuration
-from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
 
 
 # FIXME(woosuk): The logic here is duplicated with the main sampling code.

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,4 @@`
`1`	`1`	`from dataclasses import dataclass`
`2`		`-`
`3`	`2`	`from typing import Any, Optional`
`4`	`3`
`5`	`4`	`import torch`