@@ -81,10 +81,7 @@ def run_busy_loop(self):
81
81
self .execute_dummy_batch ()
82
82
83
83
84
- def run_engine_core_dplb (* args ,
85
- dp_rank : int = 0 ,
86
- local_dp_rank : int = 0 ,
87
- ** kwargs ):
84
+ def run_engine_core (* args , dp_rank : int = 0 , local_dp_rank : int = 0 , ** kwargs ):
88
85
"""Launch EngineCore busy loop in background process."""
89
86
90
87
# Signal handler used for graceful termination.
@@ -188,62 +185,7 @@ def _update_from_kv_xfer_finished(self,
188
185
req_id )
189
186
190
187
191
- def run_engine_core (* args , dp_rank : int = 0 , local_dp_rank : int = 0 , ** kwargs ):
192
- """Launch EngineCore busy loop in background process."""
193
-
194
- # Signal handler used for graceful termination.
195
- # SystemExit exception is only raised once to allow this and worker
196
- # processes to terminate without error
197
- shutdown_requested = False
198
-
199
- # Ensure we can serialize transformer config after spawning
200
- maybe_register_config_serialize_by_value ()
201
-
202
- def signal_handler (signum , frame ):
203
- nonlocal shutdown_requested
204
- if not shutdown_requested :
205
- shutdown_requested = True
206
- raise SystemExit ()
207
-
208
- # Either SIGTERM or SIGINT will terminate the engine_core
209
- signal .signal (signal .SIGTERM , signal_handler )
210
- signal .signal (signal .SIGINT , signal_handler )
211
-
212
- engine_core : Optional [EngineCoreProc ] = None
213
- try :
214
- parallel_config : ParallelConfig = kwargs ["vllm_config" ].parallel_config
215
- if parallel_config .data_parallel_size > 1 or dp_rank > 0 :
216
- # Set data parallel rank for this engine process.
217
- parallel_config .data_parallel_rank = dp_rank
218
- parallel_config .data_parallel_rank_local = local_dp_rank
219
- engine_core = DPEngineCoreProc (* args , ** kwargs )
220
- else :
221
- engine_core = EngineCoreProc (* args , ** kwargs )
222
-
223
- engine_core .scheduler .finish_requests = types .MethodType (
224
- finish_requests , engine_core .scheduler )
225
- engine_core .scheduler ._update_from_kv_xfer_finished = types .MethodType (
226
- _update_from_kv_xfer_finished , engine_core .scheduler )
227
- engine_core .run_busy_loop ()
228
-
229
- except SystemExit :
230
- logger .debug ("EngineCore exiting." )
231
- raise
232
- except Exception as e :
233
- if engine_core is None :
234
- logger .exception ("EngineCore failed to start." )
235
- else :
236
- logger .exception ("EngineCore encountered a fatal error." )
237
- engine_core ._send_engine_dead ()
238
- raise e
239
- finally :
240
- if engine_core is not None :
241
- engine_core .shutdown ()
242
-
243
-
244
188
# Apply this patch only if the external data parallelism is enabled
245
189
if vllm_ascend_envs .VLLM_ASCEND_EXTERNAL_DP_LB_ENABLED :
246
190
# Patch the EngineCoreClient to use the custom make_async_mp_client
247
- EngineCoreProc .run_engine_core = run_engine_core_dplb # type: ignore[attr-defined]
248
- else :
249
191
EngineCoreProc .run_engine_core = run_engine_core # type: ignore[attr-defined]
0 commit comments