Skip to content

Commit 5926225

Browse files
authored
rm ptach run_engine_core in dense case (#2665)
### What this PR does / why we need it? patch run_engine_core in qwen will introduce preformance degradation in qwen, so we only enable it in EXTERNAL_DP_LB_ENABLED case ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? by ci --------- Signed-off-by: zouyida2052 <[email protected]>
1 parent 551b87e commit 5926225

File tree

1 file changed

+1
-59
lines changed

1 file changed

+1
-59
lines changed

vllm_ascend/patch/platform/patch_0_9_1/patch_core.py

Lines changed: 1 addition & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,7 @@ def run_busy_loop(self):
8181
self.execute_dummy_batch()
8282

8383

84-
def run_engine_core_dplb(*args,
85-
dp_rank: int = 0,
86-
local_dp_rank: int = 0,
87-
**kwargs):
84+
def run_engine_core(*args, dp_rank: int = 0, local_dp_rank: int = 0, **kwargs):
8885
"""Launch EngineCore busy loop in background process."""
8986

9087
# Signal handler used for graceful termination.
@@ -188,62 +185,7 @@ def _update_from_kv_xfer_finished(self,
188185
req_id)
189186

190187

191-
def run_engine_core(*args, dp_rank: int = 0, local_dp_rank: int = 0, **kwargs):
192-
"""Launch EngineCore busy loop in background process."""
193-
194-
# Signal handler used for graceful termination.
195-
# SystemExit exception is only raised once to allow this and worker
196-
# processes to terminate without error
197-
shutdown_requested = False
198-
199-
# Ensure we can serialize transformer config after spawning
200-
maybe_register_config_serialize_by_value()
201-
202-
def signal_handler(signum, frame):
203-
nonlocal shutdown_requested
204-
if not shutdown_requested:
205-
shutdown_requested = True
206-
raise SystemExit()
207-
208-
# Either SIGTERM or SIGINT will terminate the engine_core
209-
signal.signal(signal.SIGTERM, signal_handler)
210-
signal.signal(signal.SIGINT, signal_handler)
211-
212-
engine_core: Optional[EngineCoreProc] = None
213-
try:
214-
parallel_config: ParallelConfig = kwargs["vllm_config"].parallel_config
215-
if parallel_config.data_parallel_size > 1 or dp_rank > 0:
216-
# Set data parallel rank for this engine process.
217-
parallel_config.data_parallel_rank = dp_rank
218-
parallel_config.data_parallel_rank_local = local_dp_rank
219-
engine_core = DPEngineCoreProc(*args, **kwargs)
220-
else:
221-
engine_core = EngineCoreProc(*args, **kwargs)
222-
223-
engine_core.scheduler.finish_requests = types.MethodType(
224-
finish_requests, engine_core.scheduler)
225-
engine_core.scheduler._update_from_kv_xfer_finished = types.MethodType(
226-
_update_from_kv_xfer_finished, engine_core.scheduler)
227-
engine_core.run_busy_loop()
228-
229-
except SystemExit:
230-
logger.debug("EngineCore exiting.")
231-
raise
232-
except Exception as e:
233-
if engine_core is None:
234-
logger.exception("EngineCore failed to start.")
235-
else:
236-
logger.exception("EngineCore encountered a fatal error.")
237-
engine_core._send_engine_dead()
238-
raise e
239-
finally:
240-
if engine_core is not None:
241-
engine_core.shutdown()
242-
243-
244188
# Apply this patch only if the external data parallelism is enabled
245189
if vllm_ascend_envs.VLLM_ASCEND_EXTERNAL_DP_LB_ENABLED:
246190
# Patch the EngineCoreClient to use the custom make_async_mp_client
247-
EngineCoreProc.run_engine_core = run_engine_core_dplb # type: ignore[attr-defined]
248-
else:
249191
EngineCoreProc.run_engine_core = run_engine_core # type: ignore[attr-defined]

0 commit comments

Comments
 (0)