@@ -245,17 +245,13 @@ def get_diff_sampling_param(self):
245
245
return self .diff_sampling_param or {}
246
246
247
247
248
- def _build_serving_chat (
249
- engine : AsyncLLM , model_config : MockModelConfig
250
- ) -> OpenAIServingChat :
248
+ def _build_serving_chat (engine : AsyncLLM ) -> OpenAIServingChat :
251
249
models = OpenAIServingModels (
252
250
engine_client = engine ,
253
251
base_model_paths = BASE_MODEL_PATHS ,
254
- model_config = model_config ,
255
252
)
256
253
serving_chat = OpenAIServingChat (
257
254
engine ,
258
- model_config ,
259
255
models ,
260
256
response_role = "assistant" ,
261
257
chat_template = CHAT_TEMPLATE ,
@@ -280,18 +276,17 @@ async def _fake_process_inputs(
280
276
281
277
@dataclass
282
278
class MockEngine :
283
- async def get_model_config (self ):
284
- return MockModelConfig ()
279
+ model_config : MockModelConfig = field (default_factory = MockModelConfig )
280
+ processor : MagicMock = field (default_factory = MagicMock )
281
+ io_processor : MagicMock = field (default_factory = MagicMock )
285
282
286
283
287
284
async def _async_serving_chat_init ():
288
285
engine = MockEngine ()
289
- model_config = await engine .get_model_config ()
290
286
291
- models = OpenAIServingModels (engine , model_config , BASE_MODEL_PATHS )
287
+ models = OpenAIServingModels (engine , BASE_MODEL_PATHS )
292
288
serving_completion = OpenAIServingChat (
293
289
engine ,
294
- model_config ,
295
290
models ,
296
291
response_role = "assistant" ,
297
292
chat_template = CHAT_TEMPLATE ,
@@ -311,8 +306,11 @@ async def test_serving_chat_returns_correct_model_name():
311
306
mock_engine = MagicMock (spec = AsyncLLM )
312
307
mock_engine .get_tokenizer .return_value = get_tokenizer (MODEL_NAME )
313
308
mock_engine .errored = False
309
+ mock_engine .model_config = MockModelConfig ()
310
+ mock_engine .processor = MagicMock ()
311
+ mock_engine .io_processor = MagicMock ()
314
312
315
- serving_chat = _build_serving_chat (mock_engine , MockModelConfig () )
313
+ serving_chat = _build_serving_chat (mock_engine )
316
314
messages = [{"role" : "user" , "content" : "what is 1+1?" }]
317
315
318
316
async def return_model_name (* args ):
@@ -338,8 +336,11 @@ async def test_serving_chat_should_set_correct_max_tokens():
338
336
mock_engine = MagicMock (spec = AsyncLLM )
339
337
mock_engine .get_tokenizer .return_value = get_tokenizer (MODEL_NAME )
340
338
mock_engine .errored = False
339
+ mock_engine .model_config = MockModelConfig ()
340
+ mock_engine .processor = MagicMock ()
341
+ mock_engine .io_processor = MagicMock ()
341
342
342
- serving_chat = _build_serving_chat (mock_engine , MockModelConfig () )
343
+ serving_chat = _build_serving_chat (mock_engine )
343
344
344
345
req = ChatCompletionRequest (
345
346
model = MODEL_NAME ,
@@ -368,9 +369,12 @@ async def test_serving_chat_should_set_correct_max_tokens():
368
369
mock_engine = MagicMock (spec = AsyncLLM )
369
370
mock_engine .get_tokenizer .return_value = get_tokenizer (MODEL_NAME )
370
371
mock_engine .errored = False
372
+ mock_engine .model_config = mock_model_config
373
+ mock_engine .processor = MagicMock ()
374
+ mock_engine .io_processor = MagicMock ()
371
375
372
376
# Initialize the serving chat
373
- serving_chat = _build_serving_chat (mock_engine , mock_model_config )
377
+ serving_chat = _build_serving_chat (mock_engine )
374
378
375
379
# Test Case 1: No max_tokens specified in request
376
380
req = ChatCompletionRequest (
@@ -410,9 +414,12 @@ async def test_serving_chat_should_set_correct_max_tokens():
410
414
mock_engine = MagicMock (spec = AsyncLLM )
411
415
mock_engine .get_tokenizer .return_value = get_tokenizer (MODEL_NAME )
412
416
mock_engine .errored = False
417
+ mock_engine .model_config = mock_model_config
418
+ mock_engine .processor = MagicMock ()
419
+ mock_engine .io_processor = MagicMock ()
413
420
414
421
# Initialize the serving chat
415
- serving_chat = _build_serving_chat (mock_engine , mock_model_config )
422
+ serving_chat = _build_serving_chat (mock_engine )
416
423
417
424
# Test case 1: No max_tokens specified, defaults to context_window
418
425
req = ChatCompletionRequest (
@@ -453,9 +460,12 @@ async def test_serving_chat_could_load_correct_generation_config():
453
460
mock_engine = MagicMock (spec = AsyncLLM )
454
461
mock_engine .get_tokenizer .return_value = get_tokenizer (MODEL_NAME )
455
462
mock_engine .errored = False
463
+ mock_engine .model_config = mock_model_config
464
+ mock_engine .processor = MagicMock ()
465
+ mock_engine .io_processor = MagicMock ()
456
466
457
467
# Initialize the serving chat
458
- serving_chat = _build_serving_chat (mock_engine , mock_model_config )
468
+ serving_chat = _build_serving_chat (mock_engine )
459
469
460
470
req = ChatCompletionRequest (
461
471
model = MODEL_NAME ,
@@ -496,8 +506,11 @@ async def test_serving_chat_did_set_correct_cache_salt(model_type):
496
506
mock_engine = MagicMock (spec = AsyncLLM )
497
507
mock_engine .get_tokenizer .return_value = get_tokenizer (MODEL_NAME )
498
508
mock_engine .errored = False
509
+ mock_engine .model_config = mock_model_config
510
+ mock_engine .processor = MagicMock ()
511
+ mock_engine .io_processor = MagicMock ()
499
512
500
- serving_chat = _build_serving_chat (mock_engine , mock_model_config )
513
+ serving_chat = _build_serving_chat (mock_engine )
501
514
502
515
# Test cache_salt
503
516
req = ChatCompletionRequest (
0 commit comments