11import os
22import signal
3- from typing import Optional
3+ import types
4+ from collections .abc import Iterable
5+ from typing import Optional , Union
46
57from vllm .config import ParallelConfig , VllmConfig
68from vllm .logger import init_logger
79from vllm .transformers_utils .config import \
810 maybe_register_config_serialize_by_value
911from vllm .v1 .engine .core import DPEngineCoreProc , EngineCoreProc
12+ from vllm .v1 .outputs import ModelRunnerOutput
13+ from vllm .v1 .request import RequestStatus
1014
1115import vllm_ascend .envs as vllm_ascend_envs
1216
@@ -77,7 +81,10 @@ def run_busy_loop(self):
7781 self .execute_dummy_batch ()
7882
7983
80- def run_engine_core (* args , dp_rank : int = 0 , local_dp_rank : int = 0 , ** kwargs ):
84+ def run_engine_core_dplb (* args ,
85+ dp_rank : int = 0 ,
86+ local_dp_rank : int = 0 ,
87+ ** kwargs ):
8188 """Launch EngineCore busy loop in background process."""
8289
8390 # Signal handler used for graceful termination.
@@ -108,7 +115,115 @@ def signal_handler(signum, frame):
108115 engine_core = ExternealDPEngineCoreProc (* args , ** kwargs )
109116 else :
110117 engine_core = EngineCoreProc (* args , ** kwargs )
118+ engine_core .scheduler .finish_requests = types .MethodType (
119+ finish_requests , engine_core .scheduler )
120+ engine_core .scheduler ._update_from_kv_xfer_finished = types .MethodType (
121+ _update_from_kv_xfer_finished , engine_core .scheduler )
122+ engine_core .run_busy_loop ()
123+
124+ except SystemExit :
125+ logger .debug ("EngineCore exiting." )
126+ raise
127+ except Exception as e :
128+ if engine_core is None :
129+ logger .exception ("EngineCore failed to start." )
130+ else :
131+ logger .exception ("EngineCore encountered a fatal error." )
132+ engine_core ._send_engine_dead ()
133+ raise e
134+ finally :
135+ if engine_core is not None :
136+ engine_core .shutdown ()
137+
138+
139+ def finish_requests (
140+ self ,
141+ request_ids : Union [str , Iterable [str ]],
142+ finished_status : RequestStatus ,
143+ ) -> None :
144+ """Handles the finish signal from outside the scheduler.
145+ For example, the API server can abort a request when the client
146+ disconnects.
147+ """
148+ assert RequestStatus .is_finished (finished_status )
149+ if isinstance (request_ids , str ):
150+ request_ids = (request_ids , )
151+ else :
152+ request_ids = set (request_ids )
153+
154+ for req_id in request_ids :
155+ request = self .requests .get (req_id )
156+ if request is None :
157+ # Invalid request ID.
158+ continue
159+ if request in self .waiting or request in self .running :
160+ if request .status == RequestStatus .RUNNING :
161+ self .running .remove (request )
162+ else :
163+ self .waiting .remove (request )
164+ request .status = finished_status
165+ self ._free_request (request )
166+
167+
168+ def _update_from_kv_xfer_finished (self ,
169+ model_runner_output : ModelRunnerOutput ):
170+ """
171+ KV Connector: update the scheduler state based on the output.
172+ The Worker side connectors add finished_recving and
173+ finished_sending reqs to the output.
174+ * if finished_sending: free the blocks
175+ # if finished_recving: add to state so we can
176+ scheduler the request during the next step.
177+ """
178+ # KV Connector:: update recv and send status from last step.
179+ for req_id in (model_runner_output .finished_recving or ()):
180+ logger .debug ("Finished recving KV transfer for request %s" , req_id )
181+ self .finished_recving_kv_req_ids .add (req_id )
182+ for req_id in (model_runner_output .finished_sending or ()):
183+ logger .debug ("Finished sending KV transfer for request %s" , req_id )
184+ if req_id in self .requests :
185+ self ._free_blocks (self .requests [req_id ])
186+ else :
187+ logger .debug ("cannot find the req_id it may have been aborted.%s" ,
188+ req_id )
189+
190+
191+ def run_engine_core (* args , dp_rank : int = 0 , local_dp_rank : int = 0 , ** kwargs ):
192+ """Launch EngineCore busy loop in background process."""
193+
194+ # Signal handler used for graceful termination.
195+ # SystemExit exception is only raised once to allow this and worker
196+ # processes to terminate without error
197+ shutdown_requested = False
198+
199+ # Ensure we can serialize transformer config after spawning
200+ maybe_register_config_serialize_by_value ()
201+
202+ def signal_handler (signum , frame ):
203+ nonlocal shutdown_requested
204+ if not shutdown_requested :
205+ shutdown_requested = True
206+ raise SystemExit ()
207+
208+ # Either SIGTERM or SIGINT will terminate the engine_core
209+ signal .signal (signal .SIGTERM , signal_handler )
210+ signal .signal (signal .SIGINT , signal_handler )
211+
212+ engine_core : Optional [EngineCoreProc ] = None
213+ try :
214+ parallel_config : ParallelConfig = kwargs ["vllm_config" ].parallel_config
215+ if parallel_config .data_parallel_size > 1 or dp_rank > 0 :
216+ # Set data parallel rank for this engine process.
217+ parallel_config .data_parallel_rank = dp_rank
218+ parallel_config .data_parallel_rank_local = local_dp_rank
219+ engine_core = DPEngineCoreProc (* args , ** kwargs )
220+ else :
221+ engine_core = EngineCoreProc (* args , ** kwargs )
111222
223+ engine_core .scheduler .finish_requests = types .MethodType (
224+ finish_requests , engine_core .scheduler )
225+ engine_core .scheduler ._update_from_kv_xfer_finished = types .MethodType (
226+ _update_from_kv_xfer_finished , engine_core .scheduler )
112227 engine_core .run_busy_loop ()
113228
114229 except SystemExit :
@@ -129,4 +244,6 @@ def signal_handler(signum, frame):
129244# Apply this patch only if the external data parallelism is enabled
130245if vllm_ascend_envs .VLLM_ASCEND_EXTERNAL_DP_LB_ENABLED :
131246 # Patch the EngineCoreClient to use the custom make_async_mp_client
247+ EngineCoreProc .run_engine_core = run_engine_core_dplb # type: ignore[attr-defined]
248+ else :
132249 EngineCoreProc .run_engine_core = run_engine_core # type: ignore[attr-defined]
0 commit comments