Skip to content
This repository was archived by the owner on Sep 4, 2025. It is now read-only.

Commit f842a7a

Browse files
authored
[misc] remove engine_use_ray (vllm-project#8126)
1 parent a65cb16 commit f842a7a

File tree

8 files changed

+32
-197
lines changed

8 files changed

+32
-197
lines changed

tests/async_engine/test_api_server.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import os
21
import subprocess
32
import sys
43
import time
@@ -26,8 +25,7 @@ def _query_server_long(prompt: str) -> dict:
2625

2726

2827
@pytest.fixture
29-
def api_server(tokenizer_pool_size: int, engine_use_ray: bool,
30-
worker_use_ray: bool):
28+
def api_server(tokenizer_pool_size: int, worker_use_ray: bool):
3129
script_path = Path(__file__).parent.joinpath(
3230
"api_server_async_engine.py").absolute()
3331
commands = [
@@ -37,25 +35,17 @@ def api_server(tokenizer_pool_size: int, engine_use_ray: bool,
3735
str(tokenizer_pool_size)
3836
]
3937

40-
# Copy the environment variables and append `VLLM_ALLOW_ENGINE_USE_RAY=1`
41-
# to prevent `--engine-use-ray` raises an exception due to it deprecation
42-
env_vars = os.environ.copy()
43-
env_vars["VLLM_ALLOW_ENGINE_USE_RAY"] = "1"
44-
45-
if engine_use_ray:
46-
commands.append("--engine-use-ray")
4738
if worker_use_ray:
4839
commands.append("--worker-use-ray")
49-
uvicorn_process = subprocess.Popen(commands, env=env_vars)
40+
uvicorn_process = subprocess.Popen(commands)
5041
yield
5142
uvicorn_process.terminate()
5243

5344

5445
@pytest.mark.parametrize("tokenizer_pool_size", [0, 2])
5546
@pytest.mark.parametrize("worker_use_ray", [False, True])
56-
@pytest.mark.parametrize("engine_use_ray", [False, True])
57-
def test_api_server(api_server, tokenizer_pool_size: int, worker_use_ray: bool,
58-
engine_use_ray: bool):
47+
def test_api_server(api_server, tokenizer_pool_size: int,
48+
worker_use_ray: bool):
5949
"""
6050
Run the API server and test it.
6151

tests/async_engine/test_async_llm_engine.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import asyncio
2-
import os
32
from asyncio import CancelledError
43
from dataclasses import dataclass
54
from typing import Optional
@@ -72,14 +71,12 @@ def has_unfinished_requests_for_virtual_engine(self, virtual_engine):
7271

7372

7473
class MockAsyncLLMEngine(AsyncLLMEngine):
75-
76-
def _init_engine(self, *args, **kwargs):
77-
return MockEngine()
74+
_engine_class = MockEngine
7875

7976

8077
@pytest.mark.asyncio
8178
async def test_new_requests_event():
82-
engine = MockAsyncLLMEngine(worker_use_ray=False, engine_use_ray=False)
79+
engine = MockAsyncLLMEngine(worker_use_ray=False)
8380
engine.start_background_loop()
8481
await asyncio.sleep(0.01)
8582
assert engine.engine.step_calls == 0
@@ -112,16 +109,11 @@ async def test_new_requests_event():
112109
assert engine.engine.add_request_calls == 3
113110
assert engine.engine.step_calls == old_step_calls + 1
114111

115-
# Allow deprecated engine_use_ray to not raise exception
116-
os.environ["VLLM_ALLOW_ENGINE_USE_RAY"] = "1"
117-
118-
engine = MockAsyncLLMEngine(worker_use_ray=True, engine_use_ray=True)
112+
engine = MockAsyncLLMEngine(worker_use_ray=True)
119113
assert engine.get_model_config() is not None
120114
assert engine.get_tokenizer() is not None
121115
assert engine.get_decoding_config() is not None
122116

123-
os.environ.pop("VLLM_ALLOW_ENGINE_USE_RAY")
124-
125117

126118
def start_engine():
127119
wait_for_gpu_memory_to_clear(

tests/async_engine/test_openapi_server_ray.py renamed to tests/async_engine/test_openapi_server.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,11 @@ def server():
1919
"--max-model-len",
2020
"2048",
2121
"--enforce-eager",
22-
"--engine-use-ray",
2322
"--chat-template",
2423
str(chatml_jinja_path),
2524
]
2625

27-
# Allow `--engine-use-ray`, otherwise the launch of the server throw
28-
# an error due to try to use a deprecated feature
29-
env_dict = {"VLLM_ALLOW_ENGINE_USE_RAY": "1"}
30-
with RemoteOpenAIServer(MODEL_NAME, args,
31-
env_dict=env_dict) as remote_server:
26+
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
3227
yield remote_server
3328

3429

vllm/engine/arg_utils.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,24 +1035,13 @@ def create_engine_config(self) -> EngineConfig:
10351035
@dataclass
10361036
class AsyncEngineArgs(EngineArgs):
10371037
"""Arguments for asynchronous vLLM engine."""
1038-
engine_use_ray: bool = False
10391038
disable_log_requests: bool = False
10401039

10411040
@staticmethod
10421041
def add_cli_args(parser: FlexibleArgumentParser,
10431042
async_args_only: bool = False) -> FlexibleArgumentParser:
10441043
if not async_args_only:
10451044
parser = EngineArgs.add_cli_args(parser)
1046-
parser.add_argument('--engine-use-ray',
1047-
action='store_true',
1048-
help='Use Ray to start the LLM engine in a '
1049-
'separate process as the server process.'
1050-
'(DEPRECATED. This argument is deprecated '
1051-
'and will be removed in a future update. '
1052-
'Set `VLLM_ALLOW_ENGINE_USE_RAY=1` to force '
1053-
'use it. See '
1054-
'https://github.com/vllm-project/vllm/issues/7045.'
1055-
')')
10561045
parser.add_argument('--disable-log-requests',
10571046
action='store_true',
10581047
help='Disable logging requests.')

0 commit comments

Comments
 (0)