19
19
from vllm .tasks import SupportedTask
20
20
from vllm .utils import make_async
21
21
from vllm .v1 .outputs import PoolerOutput , SamplerOutput
22
- from vllm .worker .worker_base import WorkerBase
22
+ from vllm .v1 . worker .worker_base import WorkerBase
23
23
24
24
logger = init_logger (__name__ )
25
25
@@ -30,7 +30,7 @@ class ExecutorBase(ABC):
30
30
"""Base class for all executors.
31
31
32
32
An executor is responsible for executing the model on one device,
33
- or it can be a distributed executor
33
+ or it can be a distributed executor
34
34
that can execute the model on multiple devices.
35
35
"""
36
36
@@ -83,7 +83,7 @@ def collective_rpc(self,
83
83
84
84
Returns:
85
85
A list containing the results from each worker.
86
-
86
+
87
87
Note:
88
88
It is recommended to use this API to only pass control messages,
89
89
and set up data-plane communication to pass data.
@@ -100,7 +100,7 @@ def determine_num_available_blocks(self) -> tuple[int, int]:
100
100
101
101
Returns a tuple `(num_gpu_blocks, num_cpu_blocks)`, where
102
102
`num_gpu_blocks` are blocks that are "active" on the device and can be
103
- appended to.
103
+ appended to.
104
104
`num_cpu_blocks` refers to "swapped" blocks in CPU memory and cannot be
105
105
appended to.
106
106
"""
@@ -327,7 +327,7 @@ def _run_workers(
327
327
run only in the remote TP workers, not the driver worker.
328
328
It will also be run asynchronously and return a list of futures
329
329
rather than blocking on the results.
330
-
330
+
331
331
# TODO: simplify and merge with collective_rpc
332
332
"""
333
333
raise NotImplementedError
0 commit comments