Skip to content

Commit 9e2ddee

Browse files
committed
ci fix
1 parent 665c4a7 commit 9e2ddee

File tree

5 files changed

+8
-10
lines changed

5 files changed

+8
-10
lines changed

vllm_ascend/attention/attention_v1.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,19 @@
2121

2222
import torch
2323
import torch_npu
24-
from vllm.config import VllmConfig
2524
from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
2625
AttentionLayer, AttentionType)
2726
from vllm.attention.backends.utils import CommonAttentionState
28-
from vllm.config import get_current_vllm_config
27+
from vllm.config import VllmConfig, get_current_vllm_config
2928
from vllm.forward_context import ForwardContext, get_forward_context
3029
from vllm.utils import direct_register_custom_op
3130
from vllm.v1.core.sched.output import SchedulerOutput
3231
from vllm.v1.worker.gpu_input_batch import InputBatch
3332

33+
from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
3434
from vllm_ascend.multistream.base import MSAttentionMetadataSplitConfig
3535
from vllm_ascend.ops.attention import vanilla_chunked_prefill
3636
from vllm_ascend.utils import get_graph_params
37-
from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
3837

3938

4039
class AscendAttentionBackend(AttentionBackend):

vllm_ascend/attention/mla_v1.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,20 @@
44
import numpy as np
55
import torch
66
import torch_npu
7-
from vllm_ascend import envs
87
from vllm.attention.backends.abstract import (AttentionBackend, AttentionLayer,
98
AttentionMetadata,
109
MLAAttentionImpl)
1110
from vllm.attention.backends.utils import PAD_SLOT_ID
12-
from vllm.config import get_current_vllm_config, VllmConfig
11+
from vllm.config import VllmConfig, get_current_vllm_config
1312
from vllm.model_executor.layers.linear import (LinearBase,
1413
UnquantizedLinearMethod)
1514
from vllm.utils import cdiv, round_down
1615

17-
from vllm_ascend.attention.utils import (AscendCommonAttentionMetadata,split_decodes_and_prefills)
18-
16+
from vllm_ascend import envs
1917
from vllm_ascend.ascend_config import get_ascend_config
2018
from vllm_ascend.attention.attention_v1 import AscendAttentionState
19+
from vllm_ascend.attention.utils import (AscendCommonAttentionMetadata,
20+
split_decodes_and_prefills)
2121
from vllm_ascend.multistream.base import MSAttentionMetadataSplitConfig
2222
from vllm_ascend.multistream.context import get_multistream_comm_context
2323
from vllm_ascend.multistream.ms_split import model_input_split_v1_mla_attn

vllm_ascend/attention/utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from dataclasses import dataclass
2-
32
from typing import Any, Optional
43

54
import torch

vllm_ascend/worker/model_runner_v1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
from vllm_ascend.ascend_forward_context import set_ascend_forward_context
7878
from vllm_ascend.attention.attention import AttentionMaskBuilder
7979
from vllm_ascend.attention.attention_v1 import AscendAttentionState
80+
from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
8081
from vllm_ascend.distributed.utils import is_lmhead_tp
8182
from vllm_ascend.eplb.adaptor.vllm_adaptor import VllmEplbAdaptor
8283
from vllm_ascend.eplb.eplb_updator import EplbUpdator
@@ -87,7 +88,6 @@
8788
check_torchair_cache_exist,
8889
write_kv_cache_bytes_to_file)
8990
from vllm_ascend.worker.mtp_proposer_v1 import MtpProposer
90-
from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
9191

9292
if TYPE_CHECKING:
9393
import xgrammar as xgr # type: ignore[import-untyped]

vllm_ascend/worker/mtp_proposer_v1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@
1616

1717
from vllm_ascend.ascend_config import get_ascend_config
1818
from vllm_ascend.ascend_forward_context import set_ascend_forward_context
19+
from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
1920
from vllm_ascend.distributed.utils import is_lmhead_tp
2021
from vllm_ascend.models.deepseek_mtp import CustomDeepSeekMTP
2122
from vllm_ascend.utils import ProfileExecuteDuration
22-
from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
2323

2424

2525
# FIXME(woosuk): The logic here is duplicated with the main sampling code.

0 commit comments

Comments
 (0)