Skip to content

Commit bc8a8ce

Browse files
authored
[Misc] Remove deprecated args in v0.10 (#21349)
Signed-off-by: Kebe <[email protected]>
1 parent 32142b3 commit bc8a8ce

File tree

4 files changed

+0
-25
lines changed

4 files changed

+0
-25
lines changed

examples/offline_inference/neuron_speculation.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ def initialize_llm():
3737
max_num_seqs=4,
3838
max_model_len=2048,
3939
block_size=2048,
40-
use_v2_block_manager=True,
4140
device="neuron",
4241
tensor_parallel_size=32,
4342
)

tests/neuron/2_core/test_mistral.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ def test_mistral():
99
tensor_parallel_size=2,
1010
max_num_seqs=4,
1111
max_model_len=128,
12-
use_v2_block_manager=True,
1312
override_neuron_config={
1413
"sequence_parallel_enabled": False,
1514
"skip_warmup": True

tests/neuron/2_core/test_multi_lora.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ def test_llama_single_lora():
1414
tensor_parallel_size=2,
1515
max_num_seqs=4,
1616
max_model_len=512,
17-
use_v2_block_manager=True,
1817
override_neuron_config={
1918
"sequence_parallel_enabled": False,
2019
"skip_warmup": True,
@@ -57,7 +56,6 @@ def test_llama_multiple_lora():
5756
tensor_parallel_size=2,
5857
max_num_seqs=4,
5958
max_model_len=512,
60-
use_v2_block_manager=True,
6159
override_neuron_config={
6260
"sequence_parallel_enabled":
6361
False,

vllm/engine/arg_utils.py

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,6 @@ class EngineArgs:
313313
CacheConfig.prefix_caching_hash_algo
314314
disable_sliding_window: bool = ModelConfig.disable_sliding_window
315315
disable_cascade_attn: bool = ModelConfig.disable_cascade_attn
316-
use_v2_block_manager: bool = True
317316
swap_space: float = CacheConfig.swap_space
318317
cpu_offload_gb: float = CacheConfig.cpu_offload_gb
319318
gpu_memory_utilization: float = CacheConfig.gpu_memory_utilization
@@ -364,7 +363,6 @@ class EngineArgs:
364363
max_prompt_adapter_token: int = \
365364
PromptAdapterConfig.max_prompt_adapter_token
366365

367-
device: Device = DeviceConfig.device
368366
num_scheduler_steps: int = SchedulerConfig.num_scheduler_steps
369367
multi_step_stream_outputs: bool = SchedulerConfig.multi_step_stream_outputs
370368
ray_workers_use_nsight: bool = ParallelConfig.ray_workers_use_nsight
@@ -745,16 +743,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
745743
"--max-prompt-adapter-token",
746744
**prompt_adapter_kwargs["max_prompt_adapter_token"])
747745

748-
# Device arguments
749-
device_kwargs = get_kwargs(DeviceConfig)
750-
device_group = parser.add_argument_group(
751-
title="DeviceConfig",
752-
description=DeviceConfig.__doc__,
753-
)
754-
device_group.add_argument("--device",
755-
**device_kwargs["device"],
756-
deprecated=True)
757-
758746
# Speculative arguments
759747
speculative_group = parser.add_argument_group(
760748
title="SpeculativeConfig",
@@ -856,15 +844,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
856844
**vllm_kwargs["additional_config"])
857845

858846
# Other arguments
859-
parser.add_argument('--use-v2-block-manager',
860-
action='store_true',
861-
default=True,
862-
deprecated=True,
863-
help='[DEPRECATED] block manager v1 has been '
864-
'removed and SelfAttnBlockSpaceManager (i.e. '
865-
'block manager v2) is now the default. '
866-
'Setting this flag to True or False'
867-
' has no effect on vLLM behavior.')
868847
parser.add_argument('--disable-log-stats',
869848
action='store_true',
870849
help='Disable logging statistics.')

0 commit comments

Comments
 (0)