Skip to content

Commit 6a113d9

Browse files
authored
[V0 Deprecation] Remove vllm.worker and update according imports (vllm-project#25901)
1 parent 2e4fe48 commit 6a113d9

File tree

11 files changed

+276
-327
lines changed

11 files changed

+276
-327
lines changed

tests/model_executor/model_loader/tensorizer_loader/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from vllm.model_executor.model_loader.tensorizer import TensorizerConfig
1111
from vllm.utils import get_distributed_init_method, get_ip, get_open_port
1212
from vllm.v1.executor.abstract import UniProcExecutor
13-
from vllm.worker.worker_base import WorkerWrapperBase
13+
from vllm.v1.worker.worker_base import WorkerWrapperBase
1414

1515
MODEL_REF = "facebook/opt-125m"
1616

tools/pre_commit/check_pickle_imports.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
'benchmarks/cutlass_benchmarks/w8a8_benchmarks.py',
3737
'benchmarks/cutlass_benchmarks/sparse_benchmarks.py',
3838
# cloudpickle
39-
'vllm/worker/worker_base.py',
4039
'vllm/executor/mp_distributed_executor.py',
4140
'vllm/executor/ray_distributed_executor.py',
4241
'vllm/entrypoints/llm.py',

vllm/executor/executor_base.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from vllm.tasks import SupportedTask
2020
from vllm.utils import make_async
2121
from vllm.v1.outputs import PoolerOutput, SamplerOutput
22-
from vllm.worker.worker_base import WorkerBase
22+
from vllm.v1.worker.worker_base import WorkerBase
2323

2424
logger = init_logger(__name__)
2525

@@ -30,7 +30,7 @@ class ExecutorBase(ABC):
3030
"""Base class for all executors.
3131
3232
An executor is responsible for executing the model on one device,
33-
or it can be a distributed executor
33+
or it can be a distributed executor
3434
that can execute the model on multiple devices.
3535
"""
3636

@@ -83,7 +83,7 @@ def collective_rpc(self,
8383
8484
Returns:
8585
A list containing the results from each worker.
86-
86+
8787
Note:
8888
It is recommended to use this API to only pass control messages,
8989
and set up data-plane communication to pass data.
@@ -100,7 +100,7 @@ def determine_num_available_blocks(self) -> tuple[int, int]:
100100
101101
Returns a tuple `(num_gpu_blocks, num_cpu_blocks)`, where
102102
`num_gpu_blocks` are blocks that are "active" on the device and can be
103-
appended to.
103+
appended to.
104104
`num_cpu_blocks` refers to "swapped" blocks in CPU memory and cannot be
105105
appended to.
106106
"""
@@ -327,7 +327,7 @@ def _run_workers(
327327
run only in the remote TP workers, not the driver worker.
328328
It will also be run asynchronously and return a list of futures
329329
rather than blocking on the results.
330-
330+
331331
# TODO: simplify and merge with collective_rpc
332332
"""
333333
raise NotImplementedError

vllm/executor/ray_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from vllm.platforms import current_platform
1717
from vllm.sequence import ExecuteModelRequest, IntermediateTensors
1818
from vllm.utils import get_ip
19-
from vllm.worker.worker_base import WorkerWrapperBase
19+
from vllm.v1.worker.worker_base import WorkerWrapperBase
2020

2121
if TYPE_CHECKING:
2222
from vllm.v1.core.sched.output import SchedulerOutput

vllm/executor/uniproc_executor.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from vllm.v1.engine import ReconfigureDistributedRequest, ReconfigureRankType
2020
from vllm.v1.executor.utils import get_and_update_mm_cache
2121
from vllm.v1.outputs import AsyncModelRunnerOutput
22-
from vllm.worker.worker_base import WorkerWrapperBase
22+
from vllm.v1.worker.worker_base import WorkerWrapperBase
2323

2424
logger = init_logger(__name__)
2525

@@ -160,10 +160,10 @@ def determine_num_available_blocks(self) -> Tuple[int, int]:
160160
"""
161161
Determine the number of available KV blocks.
162162
Add an additional all_reduce to get the min across all ranks.
163-
Note that even if we have the same `gpu_memory_utilization` and
164-
`swap_space`, the available memory in every rank might still
165-
differ because NCCL can take different amounts of memory in
166-
different ranks. Therefore, it is necessary to test if all ranks
163+
Note that even if we have the same `gpu_memory_utilization` and
164+
`swap_space`, the available memory in every rank might still
165+
differ because NCCL can take different amounts of memory in
166+
different ranks. Therefore, it is necessary to test if all ranks
167167
agree on the same KV cache configuration.
168168
"""
169169
a, b = super().determine_num_available_blocks()

vllm/platforms/cuda.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -110,17 +110,7 @@ def check_and_update_config(cls, vllm_config: "VllmConfig") -> None:
110110
model_config = vllm_config.model_config
111111

112112
if parallel_config.worker_cls == "auto":
113-
if vllm_config.speculative_config:
114-
if not envs.VLLM_USE_V1:
115-
raise NotImplementedError(
116-
"Speculative decoding is not supported on vLLM V0.")
117-
parallel_config.worker_cls = "vllm.v1.worker.gpu_worker.Worker"
118-
else:
119-
if envs.VLLM_USE_V1:
120-
parallel_config.worker_cls = \
121-
"vllm.v1.worker.gpu_worker.Worker"
122-
else:
123-
parallel_config.worker_cls = "vllm.worker.worker.Worker"
113+
parallel_config.worker_cls = "vllm.v1.worker.gpu_worker.Worker"
124114

125115
cache_config = vllm_config.cache_config
126116
if cache_config and cache_config.block_size is None:

vllm/platforms/rocm.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -327,17 +327,7 @@ def check_and_update_config(cls, vllm_config: "VllmConfig") -> None:
327327
cache_config.block_size = 16
328328

329329
if parallel_config.worker_cls == "auto":
330-
if vllm_config.speculative_config:
331-
if not use_v1:
332-
raise NotImplementedError(
333-
"Speculative decoding is not supported on vLLM V0.")
334-
parallel_config.worker_cls = "vllm.v1.worker.gpu_worker.Worker"
335-
else:
336-
if use_v1:
337-
parallel_config.worker_cls = \
338-
"vllm.v1.worker.gpu_worker.Worker"
339-
else:
340-
parallel_config.worker_cls = "vllm.worker.worker.Worker"
330+
parallel_config.worker_cls = "vllm.v1.worker.gpu_worker.Worker"
341331
# Aiter rms norm perform best when CUDA Graph capture is enabled.
342332
if (use_v1 and use_aiter_rms_norm and not is_eager_execution
343333
and "-rms_norm" not in compilation_config.custom_ops):

vllm/v1/executor/multiproc_executor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from vllm.v1.executor.utils import get_and_update_mm_cache
4242
from vllm.v1.outputs import (AsyncModelRunnerOutput, DraftTokenIds,
4343
ModelRunnerOutput)
44-
from vllm.worker.worker_base import WorkerWrapperBase
44+
from vllm.v1.worker.worker_base import WorkerWrapperBase
4545

4646
logger = init_logger(__name__)
4747

@@ -702,7 +702,7 @@ def setup_proc_title_and_log_prefix(enable_ep: bool) -> None:
702702

703703
def set_multiprocessing_worker_envs():
704704
""" Set up environment variables that should be used when there are workers
705-
in a multiprocessing environment. This should be called by the parent
705+
in a multiprocessing environment. This should be called by the parent
706706
process before worker processes are created"""
707707

708708
_maybe_force_spawn()

0 commit comments

Comments
 (0)