@@ -409,9 +409,7 @@ class EngineArgs:
409
409
get_field (LoadConfig , "model_loader_extra_config" )
410
410
ignore_patterns : Optional [Union [str ,
411
411
List [str ]]] = LoadConfig .ignore_patterns
412
- preemption_mode : Optional [str ] = SchedulerConfig .preemption_mode
413
412
414
- scheduler_delay_factor : float = SchedulerConfig .delay_factor
415
413
enable_chunked_prefill : Optional [
416
414
bool ] = SchedulerConfig .enable_chunked_prefill
417
415
disable_chunked_mm_input : bool = SchedulerConfig .disable_chunked_mm_input
@@ -439,7 +437,6 @@ class EngineArgs:
439
437
ObservabilityConfig .otlp_traces_endpoint
440
438
collect_detailed_traces : Optional [list [DetailedTraceModules ]] = \
441
439
ObservabilityConfig .collect_detailed_traces
442
- disable_async_output_proc : bool = not ModelConfig .use_async_output_proc
443
440
scheduling_policy : SchedulerPolicy = SchedulerConfig .policy
444
441
scheduler_cls : Union [str , Type [object ]] = SchedulerConfig .scheduler_cls
445
442
@@ -561,14 +558,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
561
558
** model_kwargs ["enable_prompt_embeds" ])
562
559
model_group .add_argument ("--served-model-name" ,
563
560
** model_kwargs ["served_model_name" ])
564
- # This one is a special case because it is the
565
- # opposite of ModelConfig.use_async_output_proc
566
- model_group .add_argument (
567
- "--disable-async-output-proc" ,
568
- action = "store_true" ,
569
- default = EngineArgs .disable_async_output_proc ,
570
- help = "Disable async output processing. This may result in "
571
- "lower performance." )
572
561
model_group .add_argument ("--config-format" ,
573
562
** model_kwargs ["config_format" ])
574
563
# This one is a special case because it can bool
@@ -897,10 +886,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
897
886
** scheduler_kwargs ["long_prefill_token_threshold" ])
898
887
scheduler_group .add_argument ("--num-lookahead-slots" ,
899
888
** scheduler_kwargs ["num_lookahead_slots" ])
900
- scheduler_group .add_argument ("--scheduler-delay-factor" ,
901
- ** scheduler_kwargs ["delay_factor" ])
902
- scheduler_group .add_argument ("--preemption-mode" ,
903
- ** scheduler_kwargs ["preemption_mode" ])
904
889
# multi-step scheduling has been removed; corresponding arguments
905
890
# are no longer supported.
906
891
scheduler_group .add_argument ("--scheduling-policy" ,
@@ -1029,7 +1014,6 @@ def create_model_config(self) -> ModelConfig:
1029
1014
interleave_mm_strings = self .interleave_mm_strings ,
1030
1015
media_io_kwargs = self .media_io_kwargs ,
1031
1016
skip_mm_profiling = self .skip_mm_profiling ,
1032
- use_async_output_proc = not self .disable_async_output_proc ,
1033
1017
config_format = self .config_format ,
1034
1018
mm_processor_kwargs = self .mm_processor_kwargs ,
1035
1019
mm_processor_cache_gb = self .mm_processor_cache_gb ,
@@ -1395,11 +1379,9 @@ def create_engine_config(
1395
1379
max_model_len = model_config .max_model_len ,
1396
1380
cuda_graph_sizes = self .cuda_graph_sizes ,
1397
1381
num_lookahead_slots = num_lookahead_slots ,
1398
- delay_factor = self .scheduler_delay_factor ,
1399
1382
enable_chunked_prefill = self .enable_chunked_prefill ,
1400
1383
disable_chunked_mm_input = self .disable_chunked_mm_input ,
1401
1384
is_multimodal_model = model_config .is_multimodal_model ,
1402
- preemption_mode = self .preemption_mode ,
1403
1385
send_delta_data = (envs .VLLM_USE_RAY_SPMD_WORKER
1404
1386
and parallel_config .use_ray ),
1405
1387
policy = self .scheduling_policy ,
@@ -1492,22 +1474,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
1492
1474
recommend_to_remove = False )
1493
1475
return False
1494
1476
1495
- if self .preemption_mode != SchedulerConfig .preemption_mode :
1496
- _raise_or_fallback (feature_name = "--preemption-mode" ,
1497
- recommend_to_remove = True )
1498
- return False
1499
-
1500
- if (self .disable_async_output_proc
1501
- != EngineArgs .disable_async_output_proc ):
1502
- _raise_or_fallback (feature_name = "--disable-async-output-proc" ,
1503
- recommend_to_remove = True )
1504
- return False
1505
-
1506
- if self .scheduler_delay_factor != SchedulerConfig .delay_factor :
1507
- _raise_or_fallback (feature_name = "--scheduler-delay-factor" ,
1508
- recommend_to_remove = True )
1509
- return False
1510
-
1511
1477
# No Mamba or Encoder-Decoder so far.
1512
1478
if not model_config .is_v1_compatible :
1513
1479
_raise_or_fallback (feature_name = model_config .architectures ,
0 commit comments