Skip to content

Commit ad6c655

Browse files
preload heavy modules when mp method is forkserver (#22214)
Signed-off-by: Lionel Villard <[email protected]>
1 parent 14bcf93 commit ad6c655

File tree

2 files changed

+13
-1
lines changed

2 files changed

+13
-1
lines changed

vllm/benchmarks/latency.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from tqdm import tqdm
1414

1515
import vllm.envs as envs
16-
from vllm import LLM, SamplingParams
1716
from vllm.benchmarks.lib.utils import (convert_to_pytorch_benchmark_format,
1817
write_to_json)
1918
from vllm.engine.arg_utils import EngineArgs
@@ -85,6 +84,9 @@ def main(args: argparse.Namespace):
8584
"Please set it to a valid path to use torch profiler.")
8685
engine_args = EngineArgs.from_cli_args(args)
8786

87+
# Lazy import to avoid importing LLM when the bench command is not selected.
88+
from vllm import LLM, SamplingParams
89+
8890
# NOTE(woosuk): If the request cannot be processed in a single batch,
8991
# the engine will automatically process the request in multiple batches.
9092
llm = LLM(**dataclasses.asdict(engine_args))

vllm/entrypoints/openai/api_server.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import inspect
99
import json
1010
import multiprocessing
11+
import multiprocessing.forkserver as forkserver
1112
import os
1213
import signal
1314
import socket
@@ -155,6 +156,15 @@ async def build_async_engine_client(
155156
client_config: Optional[dict[str, Any]] = None,
156157
) -> AsyncIterator[EngineClient]:
157158

159+
if os.getenv("VLLM_WORKER_MULTIPROC_METHOD") == "forkserver":
160+
# The executor is expected to be mp.
161+
# Pre-import heavy modules in the forkserver process
162+
logger.debug("Setup forkserver with pre-imports")
163+
multiprocessing.set_start_method('forkserver')
164+
multiprocessing.set_forkserver_preload(["vllm.v1.engine.async_llm"])
165+
forkserver.ensure_running()
166+
logger.debug("Forkserver setup complete!")
167+
158168
# Context manager to handle engine_client lifecycle
159169
# Ensures everything is shutdown and cleaned up on error/exit
160170
engine_args = AsyncEngineArgs.from_cli_args(args)

0 commit comments

Comments
 (0)