@@ -236,7 +236,7 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch):
236
236
Test that the engine can handle multiple concurrent batches.
237
237
"""
238
238
239
- def make_request_with_max_tokens (req_id : int ,
239
+ def make_request_with_max_tokens (req_id : str ,
240
240
max_tokens : int ) -> EngineCoreRequest :
241
241
request = make_request ()
242
242
request .request_id = req_id
@@ -297,16 +297,16 @@ def shutdown(self):
297
297
assert engine_core .batch_queue is not None
298
298
299
299
# Add two requests in a row. Each request have 12 prompt tokens.
300
- req0 = make_request_with_max_tokens (0 , 5 )
300
+ req0 = make_request_with_max_tokens ("0" , 5 )
301
301
engine_core .add_request (req0 )
302
- req1 = make_request_with_max_tokens (1 , 5 )
302
+ req1 = make_request_with_max_tokens ("1" , 5 )
303
303
engine_core .add_request (req1 )
304
304
305
305
# Schedule Batch 1: (10, req0)
306
306
assert engine_core .step_with_batch_queue ()[0 ] is None
307
307
assert engine_core .batch_queue .qsize () == 1
308
308
scheduler_output = engine_core .batch_queue .queue [- 1 ][1 ]
309
- assert scheduler_output .num_scheduled_tokens [0 ] == 10
309
+ assert scheduler_output .num_scheduled_tokens ["0" ] == 10
310
310
# num_computed_tokens should have been updated immediately.
311
311
assert engine_core .scheduler .requests [
312
312
req0 .request_id ].num_computed_tokens == 10
@@ -315,11 +315,11 @@ def shutdown(self):
315
315
assert engine_core .step_with_batch_queue ()[0 ] is None
316
316
assert engine_core .batch_queue .qsize () == 2
317
317
scheduler_output = engine_core .batch_queue .queue [- 1 ][1 ]
318
- assert scheduler_output .num_scheduled_tokens [0 ] == 2
319
- assert scheduler_output .num_scheduled_tokens [1 ] == 8
318
+ assert scheduler_output .num_scheduled_tokens ["0" ] == 2
319
+ assert scheduler_output .num_scheduled_tokens ["1" ] == 8
320
320
# num_computed_tokens should have been updated immediately.
321
- assert engine_core .scheduler .requests [0 ].num_computed_tokens == 12
322
- assert engine_core .scheduler .requests [1 ].num_computed_tokens == 8
321
+ assert engine_core .scheduler .requests ["0" ].num_computed_tokens == 12
322
+ assert engine_core .scheduler .requests ["1" ].num_computed_tokens == 8
323
323
324
324
assert engine_core .scheduler .get_num_unfinished_requests () == 2
325
325
@@ -331,7 +331,7 @@ def shutdown(self):
331
331
engine_core .step_with_batch_queue ()
332
332
assert engine_core .batch_queue .qsize () == 2
333
333
scheduler_output = engine_core .batch_queue .queue [- 1 ][1 ]
334
- assert scheduler_output .num_scheduled_tokens [1 ] == 4
334
+ assert scheduler_output .num_scheduled_tokens ["1" ] == 4
335
335
336
336
# Batch queue is full. Finish Batch 2. Get first token of req0.
337
337
output = engine_core .step_with_batch_queue ()[0 ].get (0 )
@@ -343,7 +343,7 @@ def shutdown(self):
343
343
engine_core .step_with_batch_queue ()
344
344
assert engine_core .batch_queue .qsize () == 2
345
345
scheduler_output = engine_core .batch_queue .queue [- 1 ][1 ]
346
- assert scheduler_output .num_scheduled_tokens [0 ] == 1
346
+ assert scheduler_output .num_scheduled_tokens ["0" ] == 1
347
347
348
348
# Batch queue is full. Finish Batch 3. Get first token of req1.
349
349
output = engine_core .step_with_batch_queue ()[0 ].get (0 )
@@ -355,14 +355,14 @@ def shutdown(self):
355
355
engine_core .step_with_batch_queue ()
356
356
assert engine_core .batch_queue .qsize () == 2
357
357
scheduler_output = engine_core .batch_queue .queue [- 1 ][1 ]
358
- assert scheduler_output .num_scheduled_tokens [1 ] == 1
358
+ assert scheduler_output .num_scheduled_tokens ["1" ] == 1
359
359
360
360
# Loop until req0 is finished.
361
361
step = 0
362
362
req_id = 0
363
363
expected_num_tokens = [
364
- engine_core .scheduler .requests [0 ].num_tokens + 1 ,
365
- engine_core .scheduler .requests [1 ].num_tokens + 1 ,
364
+ engine_core .scheduler .requests ["0" ].num_tokens + 1 ,
365
+ engine_core .scheduler .requests ["1" ].num_tokens + 1 ,
366
366
]
367
367
while engine_core .scheduler .get_num_unfinished_requests () == 2 :
368
368
output = engine_core .step_with_batch_queue ()[0 ]
@@ -413,3 +413,49 @@ def get_worker_cache_config_field(worker, key: str):
413
413
get_worker_cache_config_field , args = ("num_cpu_blocks" , ))
414
414
assert all (x is not None for x in num_gpu_blocks )
415
415
assert all (x is not None for x in num_cpu_blocks )
416
+
417
+
418
+ @create_new_process_for_each_test ()
419
+ def test_engine_core_invalid_request_id_type (monkeypatch : pytest .MonkeyPatch ):
420
+ """Test that engine raises TypeError for non-string request_id."""
421
+ with monkeypatch .context () as m :
422
+ m .setenv ("VLLM_USE_V1" , "1" )
423
+
424
+ engine_args = EngineArgs (model = MODEL_NAME )
425
+ vllm_config = engine_args .create_engine_config ()
426
+ executor_class = Executor .get_class (vllm_config )
427
+
428
+ with set_default_torch_num_threads (1 ):
429
+ engine_core = EngineCore (vllm_config = vllm_config ,
430
+ executor_class = executor_class ,
431
+ log_stats = True )
432
+
433
+ # Test with UUID object (common mistake)
434
+ uuid_request = make_request ()
435
+ uuid_request .request_id = uuid .uuid4 () # UUID object instead of string
436
+
437
+ with pytest .raises (TypeError ,
438
+ match = "request_id must be a string, got.*UUID" ):
439
+ engine_core .add_request (uuid_request )
440
+
441
+ # Test with integer
442
+ int_request = make_request ()
443
+ int_request .request_id = 12345
444
+
445
+ with pytest .raises (TypeError ,
446
+ match = "request_id must be a string, got.*int" ):
447
+ engine_core .add_request (int_request )
448
+
449
+ # Test with None
450
+ none_request = make_request ()
451
+ none_request .request_id = None
452
+
453
+ with pytest .raises (TypeError ,
454
+ match = "request_id must be a string, got.*NoneType" ):
455
+ engine_core .add_request (none_request )
456
+
457
+ # Verify engine is still functional after errors
458
+ valid_request = make_request ()
459
+ engine_core .add_request (valid_request )
460
+ assert len (engine_core .scheduler .waiting ) == 1
461
+ assert len (engine_core .scheduler .running ) == 0
0 commit comments