Skip to content

Commit 4bdf7ac

Browse files
[Bugfix] Fix SHM cache initialization (vllm-project#26427)
Signed-off-by: DarkLight1337 <[email protected]>
1 parent dc7976d commit 4bdf7ac

30 files changed

+357
-417
lines changed

tests/entrypoints/openai/test_lora_resolvers.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,15 +113,17 @@ async def mock_generate(*args, **kwargs):
113113
mock_engine.generate.reset_mock()
114114
mock_engine.add_lora.reset_mock()
115115

116-
mock_model_config = MockModelConfig()
116+
mock_engine.model_config = MockModelConfig()
117+
mock_engine.processor = MagicMock()
118+
mock_engine.io_processor = MagicMock()
119+
117120
models = OpenAIServingModels(
118121
engine_client=mock_engine,
119122
base_model_paths=BASE_MODEL_PATHS,
120-
model_config=mock_model_config,
121123
)
122124

123125
serving_completion = OpenAIServingCompletion(
124-
mock_engine, mock_model_config, models, request_logger=None
126+
mock_engine, models, request_logger=None
125127
)
126128

127129
serving_completion._process_inputs = AsyncMock(

tests/entrypoints/openai/test_serving_chat.py

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -245,17 +245,13 @@ def get_diff_sampling_param(self):
245245
return self.diff_sampling_param or {}
246246

247247

248-
def _build_serving_chat(
249-
engine: AsyncLLM, model_config: MockModelConfig
250-
) -> OpenAIServingChat:
248+
def _build_serving_chat(engine: AsyncLLM) -> OpenAIServingChat:
251249
models = OpenAIServingModels(
252250
engine_client=engine,
253251
base_model_paths=BASE_MODEL_PATHS,
254-
model_config=model_config,
255252
)
256253
serving_chat = OpenAIServingChat(
257254
engine,
258-
model_config,
259255
models,
260256
response_role="assistant",
261257
chat_template=CHAT_TEMPLATE,
@@ -280,18 +276,17 @@ async def _fake_process_inputs(
280276

281277
@dataclass
282278
class MockEngine:
283-
async def get_model_config(self):
284-
return MockModelConfig()
279+
model_config: MockModelConfig = field(default_factory=MockModelConfig)
280+
processor: MagicMock = field(default_factory=MagicMock)
281+
io_processor: MagicMock = field(default_factory=MagicMock)
285282

286283

287284
async def _async_serving_chat_init():
288285
engine = MockEngine()
289-
model_config = await engine.get_model_config()
290286

291-
models = OpenAIServingModels(engine, model_config, BASE_MODEL_PATHS)
287+
models = OpenAIServingModels(engine, BASE_MODEL_PATHS)
292288
serving_completion = OpenAIServingChat(
293289
engine,
294-
model_config,
295290
models,
296291
response_role="assistant",
297292
chat_template=CHAT_TEMPLATE,
@@ -311,8 +306,11 @@ async def test_serving_chat_returns_correct_model_name():
311306
mock_engine = MagicMock(spec=AsyncLLM)
312307
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
313308
mock_engine.errored = False
309+
mock_engine.model_config = MockModelConfig()
310+
mock_engine.processor = MagicMock()
311+
mock_engine.io_processor = MagicMock()
314312

315-
serving_chat = _build_serving_chat(mock_engine, MockModelConfig())
313+
serving_chat = _build_serving_chat(mock_engine)
316314
messages = [{"role": "user", "content": "what is 1+1?"}]
317315

318316
async def return_model_name(*args):
@@ -338,8 +336,11 @@ async def test_serving_chat_should_set_correct_max_tokens():
338336
mock_engine = MagicMock(spec=AsyncLLM)
339337
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
340338
mock_engine.errored = False
339+
mock_engine.model_config = MockModelConfig()
340+
mock_engine.processor = MagicMock()
341+
mock_engine.io_processor = MagicMock()
341342

342-
serving_chat = _build_serving_chat(mock_engine, MockModelConfig())
343+
serving_chat = _build_serving_chat(mock_engine)
343344

344345
req = ChatCompletionRequest(
345346
model=MODEL_NAME,
@@ -368,9 +369,12 @@ async def test_serving_chat_should_set_correct_max_tokens():
368369
mock_engine = MagicMock(spec=AsyncLLM)
369370
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
370371
mock_engine.errored = False
372+
mock_engine.model_config = mock_model_config
373+
mock_engine.processor = MagicMock()
374+
mock_engine.io_processor = MagicMock()
371375

372376
# Initialize the serving chat
373-
serving_chat = _build_serving_chat(mock_engine, mock_model_config)
377+
serving_chat = _build_serving_chat(mock_engine)
374378

375379
# Test Case 1: No max_tokens specified in request
376380
req = ChatCompletionRequest(
@@ -410,9 +414,12 @@ async def test_serving_chat_should_set_correct_max_tokens():
410414
mock_engine = MagicMock(spec=AsyncLLM)
411415
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
412416
mock_engine.errored = False
417+
mock_engine.model_config = mock_model_config
418+
mock_engine.processor = MagicMock()
419+
mock_engine.io_processor = MagicMock()
413420

414421
# Initialize the serving chat
415-
serving_chat = _build_serving_chat(mock_engine, mock_model_config)
422+
serving_chat = _build_serving_chat(mock_engine)
416423

417424
# Test case 1: No max_tokens specified, defaults to context_window
418425
req = ChatCompletionRequest(
@@ -453,9 +460,12 @@ async def test_serving_chat_could_load_correct_generation_config():
453460
mock_engine = MagicMock(spec=AsyncLLM)
454461
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
455462
mock_engine.errored = False
463+
mock_engine.model_config = mock_model_config
464+
mock_engine.processor = MagicMock()
465+
mock_engine.io_processor = MagicMock()
456466

457467
# Initialize the serving chat
458-
serving_chat = _build_serving_chat(mock_engine, mock_model_config)
468+
serving_chat = _build_serving_chat(mock_engine)
459469

460470
req = ChatCompletionRequest(
461471
model=MODEL_NAME,
@@ -496,8 +506,11 @@ async def test_serving_chat_did_set_correct_cache_salt(model_type):
496506
mock_engine = MagicMock(spec=AsyncLLM)
497507
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
498508
mock_engine.errored = False
509+
mock_engine.model_config = mock_model_config
510+
mock_engine.processor = MagicMock()
511+
mock_engine.io_processor = MagicMock()
499512

500-
serving_chat = _build_serving_chat(mock_engine, mock_model_config)
513+
serving_chat = _build_serving_chat(mock_engine)
501514

502515
# Test cache_salt
503516
req = ChatCompletionRequest(

tests/entrypoints/openai/test_serving_engine.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,12 @@ def serving() -> OpenAIServing:
2222
model_config = Mock(spec=ModelConfig)
2323
model_config.max_model_len = 32768
2424
models = Mock(spec=OpenAIServingModels)
25+
models.model_config = model_config
26+
models.processor = Mock()
27+
models.io_processor = Mock()
2528

2629
serving = OpenAIServing(
2730
engine_client=engine_client,
28-
model_config=model_config,
2931
models=models,
3032
request_logger=None,
3133
)

tests/entrypoints/openai/test_serving_models.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,17 @@
2525

2626

2727
async def _async_serving_models_init() -> OpenAIServingModels:
28-
mock_model_config = MagicMock(spec=ModelConfig)
2928
mock_engine_client = MagicMock(spec=EngineClient)
3029
# Set the max_model_len attribute to avoid missing attribute
30+
mock_model_config = MagicMock(spec=ModelConfig)
3131
mock_model_config.max_model_len = 2048
32+
mock_engine_client.model_config = mock_model_config
33+
mock_engine_client.processor = MagicMock()
34+
mock_engine_client.io_processor = MagicMock()
3235

3336
serving_models = OpenAIServingModels(
3437
engine_client=mock_engine_client,
3538
base_model_paths=BASE_MODEL_PATHS,
36-
model_config=mock_model_config,
3739
lora_modules=None,
3840
)
3941
await serving_models.init_static_loras()

tests/entrypoints/openai/test_serving_responses.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

44
from contextlib import AsyncExitStack
5-
from unittest.mock import AsyncMock, MagicMock
5+
from unittest.mock import MagicMock
66

77
import pytest
88
import pytest_asyncio
@@ -70,11 +70,14 @@ async def serving_responses_instance(self):
7070
"""Create a real OpenAIServingResponses instance for testing"""
7171
# Create minimal mocks for required dependencies
7272
engine_client = MagicMock()
73-
engine_client.get_model_config = AsyncMock()
7473

7574
model_config = MagicMock()
7675
model_config.hf_config.model_type = "test"
7776
model_config.get_diff_sampling_param.return_value = {}
77+
engine_client.model_config = model_config
78+
79+
engine_client.processor = MagicMock()
80+
engine_client.io_processor = MagicMock()
7881

7982
models = MagicMock()
8083

@@ -83,7 +86,6 @@ async def serving_responses_instance(self):
8386
# Create the actual instance
8487
instance = OpenAIServingResponses(
8588
engine_client=engine_client,
86-
model_config=model_config,
8789
models=models,
8890
request_logger=None,
8991
chat_template=None,
@@ -132,18 +134,20 @@ async def serving_responses_instance(self):
132134
"""Create a real OpenAIServingResponses instance for testing"""
133135
# Create minimal mocks for required dependencies
134136
engine_client = MagicMock()
135-
engine_client.get_model_config = AsyncMock()
136137

137138
model_config = MagicMock()
138139
model_config.hf_config.model_type = "test"
139140
model_config.get_diff_sampling_param.return_value = {}
141+
engine_client.model_config = model_config
142+
143+
engine_client.processor = MagicMock()
144+
engine_client.io_processor = MagicMock()
140145

141146
models = MagicMock()
142147

143148
# Create the actual instance
144149
instance = OpenAIServingResponses(
145150
engine_client=engine_client,
146-
model_config=model_config,
147151
models=models,
148152
request_logger=None,
149153
chat_template=None,

tests/test_inputs.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from vllm.inputs import zip_enc_dec_prompts
88
from vllm.inputs.parse import parse_raw_prompts
99
from vllm.inputs.preprocess import InputPreprocessor
10+
from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs
1011

1112
pytestmark = pytest.mark.cpu_test
1213

@@ -106,7 +107,8 @@ def test_zip_enc_dec_prompts(mm_processor_kwargs, expected_mm_kwargs):
106107
)
107108
def test_preprocessor_text_no_mm_inputs(model_id, prompt):
108109
model_config = ModelConfig(model=model_id)
109-
input_preprocessor = InputPreprocessor(model_config)
110+
tokenizer = init_tokenizer_from_configs(model_config)
111+
input_preprocessor = InputPreprocessor(model_config, tokenizer)
110112

111113
with pytest.raises(ValueError, match="does not support multimodal inputs"):
112114
input_preprocessor.preprocess(prompt)
@@ -127,8 +129,8 @@ def test_preprocessor_text_no_mm_inputs(model_id, prompt):
127129
)
128130
def test_preprocessor_always_mm_code_path(model_id, prompt):
129131
model_config = ModelConfig(model=model_id)
130-
input_preprocessor = InputPreprocessor(model_config)
131-
tokenizer = input_preprocessor.tokenizer
132+
tokenizer = init_tokenizer_from_configs(model_config)
133+
input_preprocessor = InputPreprocessor(model_config, tokenizer)
132134

133135
# HF processor adds sep token
134136
sep_token_id = tokenizer.vocab[tokenizer.sep_token]

tests/v1/engine/test_processor_multi_modal_uuids.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def __init__(self, gb: float):
6565
device_config=DeviceConfig(device="cpu"),
6666
)
6767

68-
return Processor(vllm_config)
68+
return Processor(vllm_config, tokenizer=None)
6969

7070

7171
def test_multi_modal_uuids_length_mismatch_raises(monkeypatch):

tests/v1/sample/test_logprobs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,7 @@ def test_all_logprobs(example_prompts):
459459
results_logprobs_all = runner.llm.generate(
460460
example_prompts, sampling_params=sampling_params_logprobs_all
461461
)
462-
vocab_size = runner.llm.llm_engine.get_model_config().get_vocab_size()
462+
vocab_size = runner.llm.llm_engine.model_config.get_vocab_size()
463463

464464
for i in range(len(results_logprobs_all)):
465465
logprobs = results_logprobs_all[i].outputs[0].logprobs

vllm/benchmarks/throughput.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ async def run_vllm_async(
186186
engine_args,
187187
disable_frontend_multiprocessing=disable_frontend_multiprocessing,
188188
) as llm:
189-
model_config = await llm.get_model_config()
189+
model_config = llm.model_config
190190
assert all(
191191
model_config.max_model_len
192192
>= (request.prompt_len + request.expected_output_len)

0 commit comments

Comments
 (0)