From 41abfd0ca23f10cb32bf6f2b50e1fcb428705a28 Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Tue, 13 May 2025 09:52:23 +0000 Subject: [PATCH] [Bugfix] Fix entrypoints metrics tests Signed-off-by: DarkLight1337 --- vllm/entrypoints/openai/api_server.py | 8 ++++---- vllm/v1/engine/core.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index e809579c2b1..a954a9ff90b 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -150,10 +150,6 @@ async def build_async_engine_client( async with build_async_engine_client_from_engine_args( engine_args, args.disable_frontend_multiprocessing) as engine: - - # Don't keep the dummy data in memory - await engine.reset_mm_cache() - yield engine @@ -189,6 +185,10 @@ async def build_async_engine_client_from_engine_args( usage_context=usage_context, disable_log_requests=engine_args.disable_log_requests, disable_log_stats=engine_args.disable_log_stats) + + # Don't keep the dummy data in memory + await async_llm.reset_mm_cache() + yield async_llm finally: if async_llm: diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 5a493db8a5f..bc410befbda 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -289,7 +289,7 @@ def profile(self, is_start: bool = True): def reset_mm_cache(self): # NOTE: Since this is mainly for debugging, we don't attempt to # re-sync the internal caches (P0 processor, P0 mirror, P1 mirror) - if self.scheduler.get_num_unfinished_requests(): + if self.scheduler.has_unfinished_requests(): logger.warning("Resetting the multi-modal cache when requests are " "in progress may lead to desynced internal caches.")