[Misc]Clean up useless import from vllm (vllm-project#2049)

wangxiyuan · web-flow · commit 4a008c4dac2c · 2025-07-28T16:01:59.000+08:00
Clean up useless import from vllm to make code more clear. - vLLM version: v0.10.0 - vLLM main: vllm-project/vllm@18cc33d Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
diff --git a/vllm_ascend/attention/attention_v1.py b/vllm_ascend/attention/attention_v1.py
@@ -27,11 +27,11 @@
 from vllm.forward_context import ForwardContext, get_forward_context
 from vllm.utils import direct_register_custom_op
 from vllm.v1.core.sched.output import SchedulerOutput
-from vllm.v1.worker.gpu_input_batch import InputBatch
 
 from vllm_ascend.ops.attention import vanilla_chunked_prefill
 from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_NZ, aligned_16, is_310p,
                                nd_to_nz_2d, nd_to_nz_spec)
+from vllm_ascend.worker.npu_input_batch import InputBatch
 
 
 class AscendAttentionBackend(AttentionBackend):
diff --git a/vllm_ascend/attention/attention_v1_torchair.py b/vllm_ascend/attention/attention_v1_torchair.py
@@ -25,11 +25,11 @@
                                               AttentionLayer, AttentionType)
 from vllm.attention.backends.utils import PAD_SLOT_ID, CommonAttentionState
 from vllm.v1.core.sched.output import SchedulerOutput
-from vllm.v1.worker.gpu_input_batch import InputBatch
 
 from vllm_ascend.attention.attention_v1 import AscendAttentionState
 from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_NZ, aligned_16, is_310p,
                                nd_to_nz_2d)
+from vllm_ascend.worker.npu_input_batch import InputBatch
 
 
 class AscendAttentionTorchairBackend(AttentionBackend):
diff --git a/vllm_ascend/device_allocator/camem.py b/vllm_ascend/device_allocator/camem.py
@@ -24,7 +24,8 @@
 import torch
 from acl.rt import memcpy  # type: ignore # noqa: F401
 from vllm.logger import logger
-from vllm.utils import is_pin_memory_available
+
+from vllm_ascend.platform import NPUPlatform
 
 
 def find_loaded_library(lib_name) -> Optional[str]:
@@ -199,7 +200,7 @@ def sleep(
                     size_in_bytes,
                     dtype=torch.uint8,
                     device='cpu',
-                    pin_memory=is_pin_memory_available())
+                    pin_memory=NPUPlatform.is_pin_memory_available())
                 cpu_ptr = cpu_backup_tensor.data_ptr()
                 ACL_MEMCPY_DEVICE_TO_HOST = 2
                 dest_max = cpu_ptr + size_in_bytes * 2
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
@@ -44,23 +44,20 @@
 from vllm.distributed.parallel_state import (get_dp_group, get_pp_group,
                                              get_tp_group)
 from vllm.forward_context import get_forward_context
-from vllm.inputs import INPUT_REGISTRY
 from vllm.logger import logger
 from vllm.model_executor.layers.fused_moe import FusedMoE
 from vllm.model_executor.layers.rotary_embedding import MRotaryEmbedding
 from vllm.model_executor.model_loader import get_model
 from vllm.model_executor.models.interfaces import supports_transcription
 from vllm.model_executor.models.interfaces_base import (
     VllmModelForPooling, is_pooling_model, is_text_generation_model)
-from vllm.multimodal import MULTIMODAL_REGISTRY
 from vllm.multimodal.inputs import MultiModalKwargs, PlaceholderRange
 from vllm.multimodal.utils import group_mm_inputs_by_modality
 from vllm.pooling_params import PoolingParams
 from vllm.sampling_params import SamplingType
 from vllm.sequence import IntermediateTensors
 from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, DeviceMemoryProfiler,
                         LazyLoader, cdiv)
-from vllm.v1.core.encoder_cache_manager import compute_encoder_budget
 from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,
                                         KVCacheSpec)
 from vllm.v1.outputs import (EMPTY_MODEL_RUNNER_OUTPUT, LogprobsTensors,
@@ -169,13 +166,6 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
         self.device = device
         self.dtype = self.model_config.dtype
         self.sampler = Sampler()
-        # Multi-modal data support
-        self.input_registry = INPUT_REGISTRY
-        self.mm_registry = MULTIMODAL_REGISTRY
-        self.max_num_encoder_input_tokens, self.encoder_cache_size = compute_encoder_budget(
-            model_config=self.model_config,
-            scheduler_config=self.scheduler_config,
-            mm_registry=self.mm_registry)
 
         # Lazy initialization, these will be set after __init__
         self.kv_caches: List[torch.Tensor] = []