Skip to content

Commit 4a008c4

Browse files
authored
[Misc]Clean up useless import from vllm (vllm-project#2049)
Clean up useless import from vllm to make code more clear. - vLLM version: v0.10.0 - vLLM main: vllm-project/vllm@18cc33d Signed-off-by: wangxiyuan <[email protected]>
1 parent 34cfdf5 commit 4a008c4

File tree

4 files changed

+5
-14
lines changed

4 files changed

+5
-14
lines changed

vllm_ascend/attention/attention_v1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@
2727
from vllm.forward_context import ForwardContext, get_forward_context
2828
from vllm.utils import direct_register_custom_op
2929
from vllm.v1.core.sched.output import SchedulerOutput
30-
from vllm.v1.worker.gpu_input_batch import InputBatch
3130

3231
from vllm_ascend.ops.attention import vanilla_chunked_prefill
3332
from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_NZ, aligned_16, is_310p,
3433
nd_to_nz_2d, nd_to_nz_spec)
34+
from vllm_ascend.worker.npu_input_batch import InputBatch
3535

3636

3737
class AscendAttentionBackend(AttentionBackend):

vllm_ascend/attention/attention_v1_torchair.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@
2525
AttentionLayer, AttentionType)
2626
from vllm.attention.backends.utils import PAD_SLOT_ID, CommonAttentionState
2727
from vllm.v1.core.sched.output import SchedulerOutput
28-
from vllm.v1.worker.gpu_input_batch import InputBatch
2928

3029
from vllm_ascend.attention.attention_v1 import AscendAttentionState
3130
from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_NZ, aligned_16, is_310p,
3231
nd_to_nz_2d)
32+
from vllm_ascend.worker.npu_input_batch import InputBatch
3333

3434

3535
class AscendAttentionTorchairBackend(AttentionBackend):

vllm_ascend/device_allocator/camem.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424
import torch
2525
from acl.rt import memcpy # type: ignore # noqa: F401
2626
from vllm.logger import logger
27-
from vllm.utils import is_pin_memory_available
27+
28+
from vllm_ascend.platform import NPUPlatform
2829

2930

3031
def find_loaded_library(lib_name) -> Optional[str]:
@@ -199,7 +200,7 @@ def sleep(
199200
size_in_bytes,
200201
dtype=torch.uint8,
201202
device='cpu',
202-
pin_memory=is_pin_memory_available())
203+
pin_memory=NPUPlatform.is_pin_memory_available())
203204
cpu_ptr = cpu_backup_tensor.data_ptr()
204205
ACL_MEMCPY_DEVICE_TO_HOST = 2
205206
dest_max = cpu_ptr + size_in_bytes * 2

vllm_ascend/worker/model_runner_v1.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -44,23 +44,20 @@
4444
from vllm.distributed.parallel_state import (get_dp_group, get_pp_group,
4545
get_tp_group)
4646
from vllm.forward_context import get_forward_context
47-
from vllm.inputs import INPUT_REGISTRY
4847
from vllm.logger import logger
4948
from vllm.model_executor.layers.fused_moe import FusedMoE
5049
from vllm.model_executor.layers.rotary_embedding import MRotaryEmbedding
5150
from vllm.model_executor.model_loader import get_model
5251
from vllm.model_executor.models.interfaces import supports_transcription
5352
from vllm.model_executor.models.interfaces_base import (
5453
VllmModelForPooling, is_pooling_model, is_text_generation_model)
55-
from vllm.multimodal import MULTIMODAL_REGISTRY
5654
from vllm.multimodal.inputs import MultiModalKwargs, PlaceholderRange
5755
from vllm.multimodal.utils import group_mm_inputs_by_modality
5856
from vllm.pooling_params import PoolingParams
5957
from vllm.sampling_params import SamplingType
6058
from vllm.sequence import IntermediateTensors
6159
from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, DeviceMemoryProfiler,
6260
LazyLoader, cdiv)
63-
from vllm.v1.core.encoder_cache_manager import compute_encoder_budget
6461
from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,
6562
KVCacheSpec)
6663
from vllm.v1.outputs import (EMPTY_MODEL_RUNNER_OUTPUT, LogprobsTensors,
@@ -169,13 +166,6 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
169166
self.device = device
170167
self.dtype = self.model_config.dtype
171168
self.sampler = Sampler()
172-
# Multi-modal data support
173-
self.input_registry = INPUT_REGISTRY
174-
self.mm_registry = MULTIMODAL_REGISTRY
175-
self.max_num_encoder_input_tokens, self.encoder_cache_size = compute_encoder_budget(
176-
model_config=self.model_config,
177-
scheduler_config=self.scheduler_config,
178-
mm_registry=self.mm_registry)
179169

180170
# Lazy initialization, these will be set after __init__
181171
self.kv_caches: List[torch.Tensor] = []

0 commit comments

Comments
 (0)