Skip to content

Commit 7e494e9

Browse files
authored
[CI] Fix broken ci (#2530)
vLLM commit vllm-project/vllm#22711 changed the encode cache entries logic, this PR adapt the same change for vllm ascend to make CI happy. Co-Authored-By: zhoux77899 <[email protected]> - vLLM version: v0.10.1.1 - vLLM main: vllm-project/vllm@0ff902f Signed-off-by: wangxiyuan <[email protected]>
1 parent 99bf25a commit 7e494e9

File tree

5 files changed

+257
-124
lines changed

5 files changed

+257
-124
lines changed

tests/ut/core/test_scheduler.py

Lines changed: 136 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -295,24 +295,25 @@ def test_stop_via_update_from_output(self):
295295
scheduler.running.append(req)
296296
req.status = RequestStatus.RUNNING
297297

298-
scheduler_output = SchedulerOutput(scheduled_new_reqs=[],
299-
scheduled_cached_reqs=[],
300-
num_scheduled_tokens={
301-
requests[0].request_id: 1,
302-
requests[1].request_id: 2
303-
},
304-
total_num_scheduled_tokens=3,
305-
scheduled_encoder_inputs={},
306-
scheduled_spec_decode_tokens={
307-
requests[0].request_id: [],
308-
requests[1].request_id: [10]
309-
},
310-
num_common_prefix_blocks=0,
311-
finished_req_ids=set(),
312-
free_encoder_input_ids=[],
313-
structured_output_request_ids={},
314-
grammar_bitmask=None)
315298
if vllm_version_is("0.10.1.1"):
299+
scheduler_output = SchedulerOutput(
300+
scheduled_new_reqs=[],
301+
scheduled_cached_reqs=[],
302+
num_scheduled_tokens={
303+
requests[0].request_id: 1,
304+
requests[1].request_id: 2
305+
},
306+
total_num_scheduled_tokens=3,
307+
scheduled_encoder_inputs={},
308+
scheduled_spec_decode_tokens={
309+
requests[0].request_id: [],
310+
requests[1].request_id: [10]
311+
},
312+
num_common_prefix_blocks=0,
313+
finished_req_ids=set(),
314+
free_encoder_input_ids=[],
315+
structured_output_request_ids={},
316+
grammar_bitmask=None)
316317
model_output = ModelRunnerOutput(
317318
req_ids=[req.request_id for req in requests],
318319
req_id_to_index={
@@ -327,6 +328,24 @@ def test_stop_via_update_from_output(self):
327328
prompt_logprobs_dict={},
328329
pooler_output=[])
329330
else:
331+
scheduler_output = SchedulerOutput(
332+
scheduled_new_reqs=[],
333+
scheduled_cached_reqs=[],
334+
num_scheduled_tokens={
335+
requests[0].request_id: 1,
336+
requests[1].request_id: 2
337+
},
338+
total_num_scheduled_tokens=3,
339+
scheduled_encoder_inputs={},
340+
scheduled_spec_decode_tokens={
341+
requests[0].request_id: [],
342+
requests[1].request_id: [10]
343+
},
344+
num_common_prefix_blocks=0,
345+
finished_req_ids=set(),
346+
free_encoder_mm_hashes=[],
347+
structured_output_request_ids={},
348+
grammar_bitmask=None)
330349
model_output = ModelRunnerOutput(
331350
req_ids=[req.request_id for req in requests],
332351
req_id_to_index={
@@ -363,25 +382,25 @@ def test_stop_via_update_from_output(self):
363382
scheduler.running.append(req)
364383
req.status = RequestStatus.RUNNING
365384

366-
scheduler_output = SchedulerOutput(scheduled_new_reqs=[],
367-
scheduled_cached_reqs=[],
368-
num_scheduled_tokens={
369-
requests[0].request_id: 3,
370-
requests[1].request_id: 2
371-
},
372-
total_num_scheduled_tokens=5,
373-
scheduled_encoder_inputs={},
374-
scheduled_spec_decode_tokens={
375-
requests[0].request_id:
376-
[10, 42],
377-
requests[1].request_id: [13]
378-
},
379-
num_common_prefix_blocks=0,
380-
finished_req_ids=set(),
381-
free_encoder_input_ids=[],
382-
structured_output_request_ids={},
383-
grammar_bitmask=None)
384385
if vllm_version_is("0.10.1.1"):
386+
scheduler_output = SchedulerOutput(
387+
scheduled_new_reqs=[],
388+
scheduled_cached_reqs=[],
389+
num_scheduled_tokens={
390+
requests[0].request_id: 3,
391+
requests[1].request_id: 2
392+
},
393+
total_num_scheduled_tokens=5,
394+
scheduled_encoder_inputs={},
395+
scheduled_spec_decode_tokens={
396+
requests[0].request_id: [10, 42],
397+
requests[1].request_id: [13]
398+
},
399+
num_common_prefix_blocks=0,
400+
finished_req_ids=set(),
401+
free_encoder_input_ids=[],
402+
structured_output_request_ids={},
403+
grammar_bitmask=None)
385404
model_output = ModelRunnerOutput(
386405
req_ids=[req.request_id for req in requests],
387406
req_id_to_index={
@@ -395,6 +414,24 @@ def test_stop_via_update_from_output(self):
395414
prompt_logprobs_dict={},
396415
pooler_output=[])
397416
else:
417+
scheduler_output = SchedulerOutput(
418+
scheduled_new_reqs=[],
419+
scheduled_cached_reqs=[],
420+
num_scheduled_tokens={
421+
requests[0].request_id: 3,
422+
requests[1].request_id: 2
423+
},
424+
total_num_scheduled_tokens=5,
425+
scheduled_encoder_inputs={},
426+
scheduled_spec_decode_tokens={
427+
requests[0].request_id: [10, 42],
428+
requests[1].request_id: [13]
429+
},
430+
num_common_prefix_blocks=0,
431+
finished_req_ids=set(),
432+
free_encoder_mm_hashes=[],
433+
structured_output_request_ids={},
434+
grammar_bitmask=None)
398435
model_output = ModelRunnerOutput(
399436
req_ids=[req.request_id for req in requests],
400437
req_id_to_index={
@@ -429,26 +466,25 @@ def test_stop_via_update_from_output(self):
429466
scheduler.running.append(req)
430467
req.status = RequestStatus.RUNNING
431468

432-
scheduler_output = SchedulerOutput(scheduled_new_reqs=[],
433-
scheduled_cached_reqs=[],
434-
num_scheduled_tokens={
435-
requests[0].request_id: 3,
436-
requests[1].request_id: 1
437-
},
438-
total_num_scheduled_tokens=4,
439-
scheduled_encoder_inputs={},
440-
scheduled_spec_decode_tokens={
441-
requests[0].request_id:
442-
[10, 11],
443-
requests[1].request_id: []
444-
},
445-
num_common_prefix_blocks=0,
446-
finished_req_ids=set(),
447-
free_encoder_input_ids=[],
448-
structured_output_request_ids={},
449-
grammar_bitmask=None)
450-
451469
if vllm_version_is("0.10.1.1"):
470+
scheduler_output = SchedulerOutput(
471+
scheduled_new_reqs=[],
472+
scheduled_cached_reqs=[],
473+
num_scheduled_tokens={
474+
requests[0].request_id: 3,
475+
requests[1].request_id: 1
476+
},
477+
total_num_scheduled_tokens=4,
478+
scheduled_encoder_inputs={},
479+
scheduled_spec_decode_tokens={
480+
requests[0].request_id: [10, 11],
481+
requests[1].request_id: []
482+
},
483+
num_common_prefix_blocks=0,
484+
finished_req_ids=set(),
485+
free_encoder_input_ids=[],
486+
structured_output_request_ids={},
487+
grammar_bitmask=None)
452488
model_output = ModelRunnerOutput(
453489
req_ids=[req.request_id for req in requests],
454490
req_id_to_index={
@@ -462,6 +498,24 @@ def test_stop_via_update_from_output(self):
462498
prompt_logprobs_dict={},
463499
pooler_output=[])
464500
else:
501+
scheduler_output = SchedulerOutput(
502+
scheduled_new_reqs=[],
503+
scheduled_cached_reqs=[],
504+
num_scheduled_tokens={
505+
requests[0].request_id: 3,
506+
requests[1].request_id: 1
507+
},
508+
total_num_scheduled_tokens=4,
509+
scheduled_encoder_inputs={},
510+
scheduled_spec_decode_tokens={
511+
requests[0].request_id: [10, 11],
512+
requests[1].request_id: []
513+
},
514+
num_common_prefix_blocks=0,
515+
finished_req_ids=set(),
516+
free_encoder_mm_hashes=[],
517+
structured_output_request_ids={},
518+
grammar_bitmask=None)
465519
model_output = ModelRunnerOutput(
466520
req_ids=[req.request_id for req in requests],
467521
req_id_to_index={
@@ -493,22 +547,21 @@ def test_stop_via_update_from_output(self):
493547
scheduler.requests[requests[0].request_id] = requests[0]
494548
scheduler.running.append(requests[0])
495549

496-
scheduler_output = SchedulerOutput(
497-
scheduled_new_reqs=[],
498-
scheduled_cached_reqs=[],
499-
num_scheduled_tokens={requests[0].request_id: 3},
500-
total_num_scheduled_tokens=3,
501-
scheduled_encoder_inputs={},
502-
scheduled_spec_decode_tokens={
503-
requests[0].request_id: [EOS_TOKEN_ID, 10]
504-
},
505-
num_common_prefix_blocks=0,
506-
finished_req_ids=set(),
507-
free_encoder_input_ids=[],
508-
structured_output_request_ids={},
509-
grammar_bitmask=None)
510-
511550
if vllm_version_is("0.10.1.1"):
551+
scheduler_output = SchedulerOutput(
552+
scheduled_new_reqs=[],
553+
scheduled_cached_reqs=[],
554+
num_scheduled_tokens={requests[0].request_id: 3},
555+
total_num_scheduled_tokens=3,
556+
scheduled_encoder_inputs={},
557+
scheduled_spec_decode_tokens={
558+
requests[0].request_id: [EOS_TOKEN_ID, 10]
559+
},
560+
num_common_prefix_blocks=0,
561+
finished_req_ids=set(),
562+
free_encoder_input_ids=[],
563+
structured_output_request_ids={},
564+
grammar_bitmask=None)
512565
model_output = ModelRunnerOutput(
513566
req_ids=[requests[0].request_id],
514567
req_id_to_index={requests[0].request_id: 0},
@@ -519,6 +572,20 @@ def test_stop_via_update_from_output(self):
519572
pooler_output=[])
520573

521574
else:
575+
scheduler_output = SchedulerOutput(
576+
scheduled_new_reqs=[],
577+
scheduled_cached_reqs=[],
578+
num_scheduled_tokens={requests[0].request_id: 3},
579+
total_num_scheduled_tokens=3,
580+
scheduled_encoder_inputs={},
581+
scheduled_spec_decode_tokens={
582+
requests[0].request_id: [EOS_TOKEN_ID, 10]
583+
},
584+
num_common_prefix_blocks=0,
585+
finished_req_ids=set(),
586+
free_encoder_mm_hashes=[],
587+
structured_output_request_ids={},
588+
grammar_bitmask=None)
522589
model_output = ModelRunnerOutput(
523590
req_ids=[requests[0].request_id],
524591
req_id_to_index={requests[0].request_id: 0},

tests/ut/worker/test_input_batch.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ def _construct_cached_request_state(req_id_suffix: int):
215215
generator=None,
216216
num_computed_tokens=len(output_token_ids),
217217
output_token_ids=output_token_ids,
218+
mm_hashes=None,
218219
)
219220

220221

vllm_ascend/core/scheduler.py

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -385,23 +385,44 @@ def skip_cur_request():
385385
req_to_new_blocks)
386386
scheduled_cached_reqs = cached_reqs_data
387387

388-
scheduler_output = SchedulerOutput(
389-
scheduled_new_reqs=new_reqs_data,
390-
scheduled_cached_reqs=scheduled_cached_reqs,
391-
num_scheduled_tokens=num_scheduled_tokens,
392-
total_num_scheduled_tokens=total_num_scheduled_tokens,
393-
scheduled_spec_decode_tokens=scheduled_spec_decode_tokens,
394-
scheduled_encoder_inputs={},
395-
num_common_prefix_blocks=num_common_prefix_blocks,
396-
# finished_req_ids is an existing state in the scheduler,
397-
# instead of being newly scheduled in this step.
398-
# It contains the request IDs that are finished in between
399-
# the previous and the current steps.
400-
finished_req_ids=self.finished_req_ids, # type: ignore
401-
free_encoder_input_ids=self.encoder_cache_manager.get_freed_ids(),
402-
structured_output_request_ids={},
403-
grammar_bitmask=None,
404-
)
388+
if vllm_version_is("0.10.1.1"):
389+
scheduler_output = SchedulerOutput(
390+
scheduled_new_reqs=new_reqs_data,
391+
scheduled_cached_reqs=scheduled_cached_reqs,
392+
num_scheduled_tokens=num_scheduled_tokens,
393+
total_num_scheduled_tokens=total_num_scheduled_tokens,
394+
scheduled_spec_decode_tokens=scheduled_spec_decode_tokens,
395+
scheduled_encoder_inputs={},
396+
num_common_prefix_blocks=num_common_prefix_blocks,
397+
# finished_req_ids is an existing state in the scheduler,
398+
# instead of being newly scheduled in this step.
399+
# It contains the request IDs that are finished in between
400+
# the previous and the current steps.
401+
finished_req_ids=self.finished_req_ids, # type: ignore
402+
free_encoder_input_ids=self.encoder_cache_manager.
403+
get_freed_ids(),
404+
structured_output_request_ids={},
405+
grammar_bitmask=None,
406+
)
407+
else:
408+
scheduler_output = SchedulerOutput(
409+
scheduled_new_reqs=new_reqs_data,
410+
scheduled_cached_reqs=scheduled_cached_reqs,
411+
num_scheduled_tokens=num_scheduled_tokens,
412+
total_num_scheduled_tokens=total_num_scheduled_tokens,
413+
scheduled_spec_decode_tokens=scheduled_spec_decode_tokens,
414+
scheduled_encoder_inputs={},
415+
num_common_prefix_blocks=num_common_prefix_blocks,
416+
# finished_req_ids is an existing state in the scheduler,
417+
# instead of being newly scheduled in this step.
418+
# It contains the request IDs that are finished in between
419+
# the previous and the current steps.
420+
finished_req_ids=self.finished_req_ids, # type: ignore
421+
free_encoder_mm_hashes=self.encoder_cache_manager.
422+
get_freed_mm_hashes(),
423+
structured_output_request_ids={},
424+
grammar_bitmask=None,
425+
)
405426

406427
# NOTE(Kuntai): this function is designed for multiple purposes:
407428
# 1. Plan the KV cache store

0 commit comments

Comments
 (0)