Skip to content

Commit 45c4e86

Browse files
chi2liuchiliu
authored andcommitted
[Bugfix] Fix TypeError in scheduler when comparing mixed request_id types (vllm-project#21816)
Signed-off-by: chiliu <[email protected]> Co-authored-by: chiliu <[email protected]>
1 parent c8102b1 commit 45c4e86

File tree

2 files changed

+64
-13
lines changed

2 files changed

+64
-13
lines changed

tests/v1/engine/test_engine_core.py

Lines changed: 59 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch):
236236
Test that the engine can handle multiple concurrent batches.
237237
"""
238238

239-
def make_request_with_max_tokens(req_id: int,
239+
def make_request_with_max_tokens(req_id: str,
240240
max_tokens: int) -> EngineCoreRequest:
241241
request = make_request()
242242
request.request_id = req_id
@@ -297,16 +297,16 @@ def shutdown(self):
297297
assert engine_core.batch_queue is not None
298298

299299
# Add two requests in a row. Each request have 12 prompt tokens.
300-
req0 = make_request_with_max_tokens(0, 5)
300+
req0 = make_request_with_max_tokens("0", 5)
301301
engine_core.add_request(req0)
302-
req1 = make_request_with_max_tokens(1, 5)
302+
req1 = make_request_with_max_tokens("1", 5)
303303
engine_core.add_request(req1)
304304

305305
# Schedule Batch 1: (10, req0)
306306
assert engine_core.step_with_batch_queue()[0] is None
307307
assert engine_core.batch_queue.qsize() == 1
308308
scheduler_output = engine_core.batch_queue.queue[-1][1]
309-
assert scheduler_output.num_scheduled_tokens[0] == 10
309+
assert scheduler_output.num_scheduled_tokens["0"] == 10
310310
# num_computed_tokens should have been updated immediately.
311311
assert engine_core.scheduler.requests[
312312
req0.request_id].num_computed_tokens == 10
@@ -315,11 +315,11 @@ def shutdown(self):
315315
assert engine_core.step_with_batch_queue()[0] is None
316316
assert engine_core.batch_queue.qsize() == 2
317317
scheduler_output = engine_core.batch_queue.queue[-1][1]
318-
assert scheduler_output.num_scheduled_tokens[0] == 2
319-
assert scheduler_output.num_scheduled_tokens[1] == 8
318+
assert scheduler_output.num_scheduled_tokens["0"] == 2
319+
assert scheduler_output.num_scheduled_tokens["1"] == 8
320320
# num_computed_tokens should have been updated immediately.
321-
assert engine_core.scheduler.requests[0].num_computed_tokens == 12
322-
assert engine_core.scheduler.requests[1].num_computed_tokens == 8
321+
assert engine_core.scheduler.requests["0"].num_computed_tokens == 12
322+
assert engine_core.scheduler.requests["1"].num_computed_tokens == 8
323323

324324
assert engine_core.scheduler.get_num_unfinished_requests() == 2
325325

@@ -331,7 +331,7 @@ def shutdown(self):
331331
engine_core.step_with_batch_queue()
332332
assert engine_core.batch_queue.qsize() == 2
333333
scheduler_output = engine_core.batch_queue.queue[-1][1]
334-
assert scheduler_output.num_scheduled_tokens[1] == 4
334+
assert scheduler_output.num_scheduled_tokens["1"] == 4
335335

336336
# Batch queue is full. Finish Batch 2. Get first token of req0.
337337
output = engine_core.step_with_batch_queue()[0].get(0)
@@ -343,7 +343,7 @@ def shutdown(self):
343343
engine_core.step_with_batch_queue()
344344
assert engine_core.batch_queue.qsize() == 2
345345
scheduler_output = engine_core.batch_queue.queue[-1][1]
346-
assert scheduler_output.num_scheduled_tokens[0] == 1
346+
assert scheduler_output.num_scheduled_tokens["0"] == 1
347347

348348
# Batch queue is full. Finish Batch 3. Get first token of req1.
349349
output = engine_core.step_with_batch_queue()[0].get(0)
@@ -355,14 +355,14 @@ def shutdown(self):
355355
engine_core.step_with_batch_queue()
356356
assert engine_core.batch_queue.qsize() == 2
357357
scheduler_output = engine_core.batch_queue.queue[-1][1]
358-
assert scheduler_output.num_scheduled_tokens[1] == 1
358+
assert scheduler_output.num_scheduled_tokens["1"] == 1
359359

360360
# Loop until req0 is finished.
361361
step = 0
362362
req_id = 0
363363
expected_num_tokens = [
364-
engine_core.scheduler.requests[0].num_tokens + 1,
365-
engine_core.scheduler.requests[1].num_tokens + 1,
364+
engine_core.scheduler.requests["0"].num_tokens + 1,
365+
engine_core.scheduler.requests["1"].num_tokens + 1,
366366
]
367367
while engine_core.scheduler.get_num_unfinished_requests() == 2:
368368
output = engine_core.step_with_batch_queue()[0]
@@ -413,3 +413,49 @@ def get_worker_cache_config_field(worker, key: str):
413413
get_worker_cache_config_field, args=("num_cpu_blocks", ))
414414
assert all(x is not None for x in num_gpu_blocks)
415415
assert all(x is not None for x in num_cpu_blocks)
416+
417+
418+
@create_new_process_for_each_test()
419+
def test_engine_core_invalid_request_id_type(monkeypatch: pytest.MonkeyPatch):
420+
"""Test that engine raises TypeError for non-string request_id."""
421+
with monkeypatch.context() as m:
422+
m.setenv("VLLM_USE_V1", "1")
423+
424+
engine_args = EngineArgs(model=MODEL_NAME)
425+
vllm_config = engine_args.create_engine_config()
426+
executor_class = Executor.get_class(vllm_config)
427+
428+
with set_default_torch_num_threads(1):
429+
engine_core = EngineCore(vllm_config=vllm_config,
430+
executor_class=executor_class,
431+
log_stats=True)
432+
433+
# Test with UUID object (common mistake)
434+
uuid_request = make_request()
435+
uuid_request.request_id = uuid.uuid4() # UUID object instead of string
436+
437+
with pytest.raises(TypeError,
438+
match="request_id must be a string, got.*UUID"):
439+
engine_core.add_request(uuid_request)
440+
441+
# Test with integer
442+
int_request = make_request()
443+
int_request.request_id = 12345
444+
445+
with pytest.raises(TypeError,
446+
match="request_id must be a string, got.*int"):
447+
engine_core.add_request(int_request)
448+
449+
# Test with None
450+
none_request = make_request()
451+
none_request.request_id = None
452+
453+
with pytest.raises(TypeError,
454+
match="request_id must be a string, got.*NoneType"):
455+
engine_core.add_request(none_request)
456+
457+
# Verify engine is still functional after errors
458+
valid_request = make_request()
459+
engine_core.add_request(valid_request)
460+
assert len(engine_core.scheduler.waiting) == 1
461+
assert len(engine_core.scheduler.running) == 0

vllm/v1/engine/core.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,11 @@ def get_supported_tasks(self) -> tuple[SupportedTask, ...]:
207207

208208
def add_request(self, request: EngineCoreRequest):
209209
"""Add request to the scheduler."""
210+
# Validate the request_id type.
211+
if not isinstance(request.request_id, str):
212+
raise TypeError(
213+
f"request_id must be a string, got {type(request.request_id)}")
214+
210215
if pooling_params := request.pooling_params:
211216
supported_pooling_tasks = [
212217
task for task in self.get_supported_tasks()

0 commit comments

Comments
 (0)