@@ -295,24 +295,25 @@ def test_stop_via_update_from_output(self):
295
295
scheduler .running .append (req )
296
296
req .status = RequestStatus .RUNNING
297
297
298
- scheduler_output = SchedulerOutput (scheduled_new_reqs = [],
299
- scheduled_cached_reqs = [],
300
- num_scheduled_tokens = {
301
- requests [0 ].request_id : 1 ,
302
- requests [1 ].request_id : 2
303
- },
304
- total_num_scheduled_tokens = 3 ,
305
- scheduled_encoder_inputs = {},
306
- scheduled_spec_decode_tokens = {
307
- requests [0 ].request_id : [],
308
- requests [1 ].request_id : [10 ]
309
- },
310
- num_common_prefix_blocks = 0 ,
311
- finished_req_ids = set (),
312
- free_encoder_input_ids = [],
313
- structured_output_request_ids = {},
314
- grammar_bitmask = None )
315
298
if vllm_version_is ("0.10.1.1" ):
299
+ scheduler_output = SchedulerOutput (
300
+ scheduled_new_reqs = [],
301
+ scheduled_cached_reqs = [],
302
+ num_scheduled_tokens = {
303
+ requests [0 ].request_id : 1 ,
304
+ requests [1 ].request_id : 2
305
+ },
306
+ total_num_scheduled_tokens = 3 ,
307
+ scheduled_encoder_inputs = {},
308
+ scheduled_spec_decode_tokens = {
309
+ requests [0 ].request_id : [],
310
+ requests [1 ].request_id : [10 ]
311
+ },
312
+ num_common_prefix_blocks = 0 ,
313
+ finished_req_ids = set (),
314
+ free_encoder_input_ids = [],
315
+ structured_output_request_ids = {},
316
+ grammar_bitmask = None )
316
317
model_output = ModelRunnerOutput (
317
318
req_ids = [req .request_id for req in requests ],
318
319
req_id_to_index = {
@@ -327,6 +328,24 @@ def test_stop_via_update_from_output(self):
327
328
prompt_logprobs_dict = {},
328
329
pooler_output = [])
329
330
else :
331
+ scheduler_output = SchedulerOutput (
332
+ scheduled_new_reqs = [],
333
+ scheduled_cached_reqs = [],
334
+ num_scheduled_tokens = {
335
+ requests [0 ].request_id : 1 ,
336
+ requests [1 ].request_id : 2
337
+ },
338
+ total_num_scheduled_tokens = 3 ,
339
+ scheduled_encoder_inputs = {},
340
+ scheduled_spec_decode_tokens = {
341
+ requests [0 ].request_id : [],
342
+ requests [1 ].request_id : [10 ]
343
+ },
344
+ num_common_prefix_blocks = 0 ,
345
+ finished_req_ids = set (),
346
+ free_encoder_mm_hashes = [],
347
+ structured_output_request_ids = {},
348
+ grammar_bitmask = None )
330
349
model_output = ModelRunnerOutput (
331
350
req_ids = [req .request_id for req in requests ],
332
351
req_id_to_index = {
@@ -363,25 +382,25 @@ def test_stop_via_update_from_output(self):
363
382
scheduler .running .append (req )
364
383
req .status = RequestStatus .RUNNING
365
384
366
- scheduler_output = SchedulerOutput (scheduled_new_reqs = [],
367
- scheduled_cached_reqs = [],
368
- num_scheduled_tokens = {
369
- requests [0 ].request_id : 3 ,
370
- requests [1 ].request_id : 2
371
- },
372
- total_num_scheduled_tokens = 5 ,
373
- scheduled_encoder_inputs = {},
374
- scheduled_spec_decode_tokens = {
375
- requests [0 ].request_id :
376
- [10 , 42 ],
377
- requests [1 ].request_id : [13 ]
378
- },
379
- num_common_prefix_blocks = 0 ,
380
- finished_req_ids = set (),
381
- free_encoder_input_ids = [],
382
- structured_output_request_ids = {},
383
- grammar_bitmask = None )
384
385
if vllm_version_is ("0.10.1.1" ):
386
+ scheduler_output = SchedulerOutput (
387
+ scheduled_new_reqs = [],
388
+ scheduled_cached_reqs = [],
389
+ num_scheduled_tokens = {
390
+ requests [0 ].request_id : 3 ,
391
+ requests [1 ].request_id : 2
392
+ },
393
+ total_num_scheduled_tokens = 5 ,
394
+ scheduled_encoder_inputs = {},
395
+ scheduled_spec_decode_tokens = {
396
+ requests [0 ].request_id : [10 , 42 ],
397
+ requests [1 ].request_id : [13 ]
398
+ },
399
+ num_common_prefix_blocks = 0 ,
400
+ finished_req_ids = set (),
401
+ free_encoder_input_ids = [],
402
+ structured_output_request_ids = {},
403
+ grammar_bitmask = None )
385
404
model_output = ModelRunnerOutput (
386
405
req_ids = [req .request_id for req in requests ],
387
406
req_id_to_index = {
@@ -395,6 +414,24 @@ def test_stop_via_update_from_output(self):
395
414
prompt_logprobs_dict = {},
396
415
pooler_output = [])
397
416
else :
417
+ scheduler_output = SchedulerOutput (
418
+ scheduled_new_reqs = [],
419
+ scheduled_cached_reqs = [],
420
+ num_scheduled_tokens = {
421
+ requests [0 ].request_id : 3 ,
422
+ requests [1 ].request_id : 2
423
+ },
424
+ total_num_scheduled_tokens = 5 ,
425
+ scheduled_encoder_inputs = {},
426
+ scheduled_spec_decode_tokens = {
427
+ requests [0 ].request_id : [10 , 42 ],
428
+ requests [1 ].request_id : [13 ]
429
+ },
430
+ num_common_prefix_blocks = 0 ,
431
+ finished_req_ids = set (),
432
+ free_encoder_mm_hashes = [],
433
+ structured_output_request_ids = {},
434
+ grammar_bitmask = None )
398
435
model_output = ModelRunnerOutput (
399
436
req_ids = [req .request_id for req in requests ],
400
437
req_id_to_index = {
@@ -429,26 +466,25 @@ def test_stop_via_update_from_output(self):
429
466
scheduler .running .append (req )
430
467
req .status = RequestStatus .RUNNING
431
468
432
- scheduler_output = SchedulerOutput (scheduled_new_reqs = [],
433
- scheduled_cached_reqs = [],
434
- num_scheduled_tokens = {
435
- requests [0 ].request_id : 3 ,
436
- requests [1 ].request_id : 1
437
- },
438
- total_num_scheduled_tokens = 4 ,
439
- scheduled_encoder_inputs = {},
440
- scheduled_spec_decode_tokens = {
441
- requests [0 ].request_id :
442
- [10 , 11 ],
443
- requests [1 ].request_id : []
444
- },
445
- num_common_prefix_blocks = 0 ,
446
- finished_req_ids = set (),
447
- free_encoder_input_ids = [],
448
- structured_output_request_ids = {},
449
- grammar_bitmask = None )
450
-
451
469
if vllm_version_is ("0.10.1.1" ):
470
+ scheduler_output = SchedulerOutput (
471
+ scheduled_new_reqs = [],
472
+ scheduled_cached_reqs = [],
473
+ num_scheduled_tokens = {
474
+ requests [0 ].request_id : 3 ,
475
+ requests [1 ].request_id : 1
476
+ },
477
+ total_num_scheduled_tokens = 4 ,
478
+ scheduled_encoder_inputs = {},
479
+ scheduled_spec_decode_tokens = {
480
+ requests [0 ].request_id : [10 , 11 ],
481
+ requests [1 ].request_id : []
482
+ },
483
+ num_common_prefix_blocks = 0 ,
484
+ finished_req_ids = set (),
485
+ free_encoder_input_ids = [],
486
+ structured_output_request_ids = {},
487
+ grammar_bitmask = None )
452
488
model_output = ModelRunnerOutput (
453
489
req_ids = [req .request_id for req in requests ],
454
490
req_id_to_index = {
@@ -462,6 +498,24 @@ def test_stop_via_update_from_output(self):
462
498
prompt_logprobs_dict = {},
463
499
pooler_output = [])
464
500
else :
501
+ scheduler_output = SchedulerOutput (
502
+ scheduled_new_reqs = [],
503
+ scheduled_cached_reqs = [],
504
+ num_scheduled_tokens = {
505
+ requests [0 ].request_id : 3 ,
506
+ requests [1 ].request_id : 1
507
+ },
508
+ total_num_scheduled_tokens = 4 ,
509
+ scheduled_encoder_inputs = {},
510
+ scheduled_spec_decode_tokens = {
511
+ requests [0 ].request_id : [10 , 11 ],
512
+ requests [1 ].request_id : []
513
+ },
514
+ num_common_prefix_blocks = 0 ,
515
+ finished_req_ids = set (),
516
+ free_encoder_mm_hashes = [],
517
+ structured_output_request_ids = {},
518
+ grammar_bitmask = None )
465
519
model_output = ModelRunnerOutput (
466
520
req_ids = [req .request_id for req in requests ],
467
521
req_id_to_index = {
@@ -493,22 +547,21 @@ def test_stop_via_update_from_output(self):
493
547
scheduler .requests [requests [0 ].request_id ] = requests [0 ]
494
548
scheduler .running .append (requests [0 ])
495
549
496
- scheduler_output = SchedulerOutput (
497
- scheduled_new_reqs = [],
498
- scheduled_cached_reqs = [],
499
- num_scheduled_tokens = {requests [0 ].request_id : 3 },
500
- total_num_scheduled_tokens = 3 ,
501
- scheduled_encoder_inputs = {},
502
- scheduled_spec_decode_tokens = {
503
- requests [0 ].request_id : [EOS_TOKEN_ID , 10 ]
504
- },
505
- num_common_prefix_blocks = 0 ,
506
- finished_req_ids = set (),
507
- free_encoder_input_ids = [],
508
- structured_output_request_ids = {},
509
- grammar_bitmask = None )
510
-
511
550
if vllm_version_is ("0.10.1.1" ):
551
+ scheduler_output = SchedulerOutput (
552
+ scheduled_new_reqs = [],
553
+ scheduled_cached_reqs = [],
554
+ num_scheduled_tokens = {requests [0 ].request_id : 3 },
555
+ total_num_scheduled_tokens = 3 ,
556
+ scheduled_encoder_inputs = {},
557
+ scheduled_spec_decode_tokens = {
558
+ requests [0 ].request_id : [EOS_TOKEN_ID , 10 ]
559
+ },
560
+ num_common_prefix_blocks = 0 ,
561
+ finished_req_ids = set (),
562
+ free_encoder_input_ids = [],
563
+ structured_output_request_ids = {},
564
+ grammar_bitmask = None )
512
565
model_output = ModelRunnerOutput (
513
566
req_ids = [requests [0 ].request_id ],
514
567
req_id_to_index = {requests [0 ].request_id : 0 },
@@ -519,6 +572,20 @@ def test_stop_via_update_from_output(self):
519
572
pooler_output = [])
520
573
521
574
else :
575
+ scheduler_output = SchedulerOutput (
576
+ scheduled_new_reqs = [],
577
+ scheduled_cached_reqs = [],
578
+ num_scheduled_tokens = {requests [0 ].request_id : 3 },
579
+ total_num_scheduled_tokens = 3 ,
580
+ scheduled_encoder_inputs = {},
581
+ scheduled_spec_decode_tokens = {
582
+ requests [0 ].request_id : [EOS_TOKEN_ID , 10 ]
583
+ },
584
+ num_common_prefix_blocks = 0 ,
585
+ finished_req_ids = set (),
586
+ free_encoder_mm_hashes = [],
587
+ structured_output_request_ids = {},
588
+ grammar_bitmask = None )
522
589
model_output = ModelRunnerOutput (
523
590
req_ids = [requests [0 ].request_id ],
524
591
req_id_to_index = {requests [0 ].request_id : 0 },
0 commit comments