Skip to content

Commit 1eaff27

Browse files
authored
[V0 deprecation] Remove long context LoRA (#21169)
Signed-off-by: Jee Jee Li <[email protected]>
1 parent cf8cc32 commit 1eaff27

File tree

13 files changed

+35
-301
lines changed

13 files changed

+35
-301
lines changed

tests/lora/conftest.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -221,11 +221,6 @@ def phi2_lora_files():
221221
return snapshot_download(repo_id="isotr0py/phi-2-test-sql-lora")
222222

223223

224-
@pytest.fixture(scope="session")
225-
def long_context_lora_files_16k_1():
226-
return snapshot_download(repo_id="SangBinCho/long_context_16k_testing_1")
227-
228-
229224
@pytest.fixture
230225
def llama_2_7b_engine_extra_embeddings():
231226
cleanup_dist_env_and_memory(shutdown_ray=True)

tests/lora/test_peft_helper.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@
3838
]
3939

4040

41-
def test_peft_helper_pass(long_context_lora_files_16k_1, tmp_path):
42-
peft_helper = PEFTHelper.from_local_dir(long_context_lora_files_16k_1,
41+
def test_peft_helper_pass(sql_lora_files, tmp_path):
42+
peft_helper = PEFTHelper.from_local_dir(sql_lora_files,
4343
max_position_embeddings=4096)
4444
lora_config = LoRAConfig(max_lora_rank=16, max_cpu_loras=3, max_loras=2)
4545
peft_helper.validate_legal(lora_config)
@@ -56,15 +56,12 @@ def test_peft_helper_pass(long_context_lora_files_16k_1, tmp_path):
5656
"embed_tokens",
5757
"lm_head",
5858
]
59-
assert peft_helper.context_length == 16384
6059
assert peft_helper.vllm_max_position_embeddings == 4096
61-
assert peft_helper.vllm_long_context_scaling_factor == float(
62-
math.ceil(peft_helper.context_length /
63-
peft_helper.vllm_max_position_embeddings))
60+
6461
# test RSLoRA
6562
rslora_config = dict(use_rslora=True)
6663
test_dir = tmp_path / "test_rslora"
67-
shutil.copytree(long_context_lora_files_16k_1, test_dir)
64+
shutil.copytree(sql_lora_files, test_dir)
6865

6966
# Load and modify configuration
7067
config_path = test_dir / "adapter_config.json"

vllm/config.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3014,12 +3014,7 @@ class LoRAConfig:
30143014
(added to the base model vocabulary)."""
30153015
lora_vocab_padding_size: ClassVar[int] = current_platform\
30163016
.get_lora_vocab_padding_size()
3017-
long_lora_scaling_factors: Optional[tuple[float, ...]] = None
3018-
"""Specify multiple scaling factors (which can be different from base model
3019-
scaling factor - see eg. Long LoRA) to allow for multiple LoRA adapters
3020-
trained with those scaling factors to be used at the same time. If not
3021-
specified, only adapters trained with the base model scaling factor are
3022-
allowed."""
3017+
30233018
default_mm_loras: Optional[dict[str, str]] = None
30243019
"""Dictionary mapping specific modalities to LoRA model paths; this field
30253020
is only applicable to multimodal models and should be leveraged when a
@@ -3052,7 +3047,6 @@ def compute_hash(self) -> str:
30523047
factors.append(self.lora_dtype)
30533048
factors.append(self.lora_extra_vocab_size)
30543049
factors.append(self.lora_vocab_padding_size)
3055-
factors.append(self.long_lora_scaling_factors)
30563050
factors.append(self.bias_enabled)
30573051
hash_str = hashlib.md5(str(factors).encode(),
30583052
usedforsecurity=False).hexdigest()
@@ -3091,11 +3085,6 @@ def verify_with_model_config(self, model_config: ModelConfig):
30913085
elif isinstance(self.lora_dtype, str):
30923086
self.lora_dtype = getattr(torch, self.lora_dtype)
30933087

3094-
def verify_lora_support(self):
3095-
if self.long_lora_scaling_factors is not None and envs.VLLM_USE_V1:
3096-
raise ValueError(
3097-
"V1 LoRA does not support long LoRA, please use V0.")
3098-
30993088

31003089
@config
31013090
@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
@@ -4564,7 +4553,6 @@ def __post_init__(self):
45644553
if self.lora_config is not None:
45654554
self.lora_config.verify_with_cache_config(self.cache_config)
45664555
self.lora_config.verify_with_model_config(self.model_config)
4567-
self.lora_config.verify_lora_support()
45684556
if self.prompt_adapter_config is not None:
45694557
self.prompt_adapter_config.verify_with_model_config(
45704558
self.model_config)

vllm/engine/arg_utils.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -358,8 +358,6 @@ class EngineArgs:
358358
max_cpu_loras: Optional[int] = LoRAConfig.max_cpu_loras
359359
lora_dtype: Optional[Union[str, torch.dtype]] = LoRAConfig.lora_dtype
360360
lora_extra_vocab_size: int = LoRAConfig.lora_extra_vocab_size
361-
long_lora_scaling_factors: Optional[tuple[float, ...]] = \
362-
LoRAConfig.long_lora_scaling_factors
363361
# PromptAdapter fields
364362
enable_prompt_adapter: bool = False
365363
max_prompt_adapters: int = PromptAdapterConfig.max_prompt_adapters
@@ -723,8 +721,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
723721
"--lora-dtype",
724722
**lora_kwargs["lora_dtype"],
725723
)
726-
lora_group.add_argument("--long-lora-scaling-factors",
727-
**lora_kwargs["long_lora_scaling_factors"])
728724
lora_group.add_argument("--max-cpu-loras",
729725
**lora_kwargs["max_cpu_loras"])
730726
lora_group.add_argument("--fully-sharded-loras",
@@ -1245,7 +1241,6 @@ def create_engine_config(
12451241
default_mm_loras=self.default_mm_loras,
12461242
fully_sharded_loras=self.fully_sharded_loras,
12471243
lora_extra_vocab_size=self.lora_extra_vocab_size,
1248-
long_lora_scaling_factors=self.long_lora_scaling_factors,
12491244
lora_dtype=self.lora_dtype,
12501245
max_cpu_loras=self.max_cpu_loras if self.max_cpu_loras
12511246
and self.max_cpu_loras > 0 else None) if self.enable_lora else None

vllm/lora/layers.py

Lines changed: 0 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@
2828
RowParallelLinear)
2929
# yapf: enable
3030
from vllm.model_executor.layers.logits_processor import LogitsProcessor
31-
from vllm.model_executor.layers.rotary_embedding import (
32-
LinearScalingRotaryEmbedding, RotaryEmbedding)
3331
from vllm.model_executor.layers.vocab_parallel_embedding import (
3432
VocabParallelEmbedding)
3533
from vllm.platforms import current_platform
@@ -1193,91 +1191,3 @@ def can_replace_layer(
11931191
) -> bool:
11941192
# Special handling for the LogitsProcessor.
11951193
return False
1196-
1197-
1198-
class LinearScalingRotaryEmbeddingWithLoRA(BaseLayerWithLoRA):
1199-
"""Implements RoPE-scaled embeddings with linear scaling for
1200-
multiple LoRA adapters with a specialized kernel.
1201-
1202-
Replace LinearScalingRotaryEmbedding with MultiLinearScalingRotaryEmbedding
1203-
which can handle multi lora adapters in a specialized kernel.
1204-
"""
1205-
1206-
def __init__(self, base_layer: RotaryEmbedding) -> None:
1207-
super().__init__()
1208-
self.base_layer = base_layer
1209-
1210-
@property
1211-
def scaling_factors(self):
1212-
return self.base_layer.scaling_factors
1213-
1214-
@property
1215-
def rotary_dim(self):
1216-
return self.base_layer.rotary_dim
1217-
1218-
def create_lora_weights(
1219-
self,
1220-
max_loras: int,
1221-
lora_config: LoRAConfig,
1222-
model_config: Optional[PretrainedConfig] = None,
1223-
) -> None:
1224-
scaling_factors = (list(lora_config.long_lora_scaling_factors)
1225-
if lora_config.long_lora_scaling_factors else [])
1226-
base_scaling_factor = (self.base_layer.scaling_factor if isinstance(
1227-
self.base_layer, LinearScalingRotaryEmbedding) else 1.0)
1228-
scaling_factors = sorted(
1229-
list(set([base_scaling_factor] + scaling_factors)))
1230-
self.base_layer = LinearScalingRotaryEmbedding(
1231-
self.base_layer.head_size,
1232-
self.base_layer.rotary_dim,
1233-
self.base_layer.max_position_embeddings,
1234-
self.base_layer.base,
1235-
self.base_layer.is_neox_style,
1236-
scaling_factors,
1237-
self.base_layer.dtype,
1238-
)
1239-
1240-
def reset_lora(self, index: int):
1241-
...
1242-
1243-
def set_lora(
1244-
self,
1245-
index: int,
1246-
lora_a: torch.Tensor,
1247-
lora_b: torch.Tensor,
1248-
embeddings_tensor: Optional[torch.Tensor],
1249-
bias: Optional[torch.Tensor] = None,
1250-
):
1251-
...
1252-
1253-
def forward(
1254-
self,
1255-
positions: torch.Tensor,
1256-
query: torch.Tensor,
1257-
key: torch.Tensor,
1258-
) -> tuple[torch.Tensor, torch.Tensor]:
1259-
return self.base_layer(
1260-
positions,
1261-
query,
1262-
key,
1263-
offsets=self.punica_wrapper.long_lora_indices,
1264-
)
1265-
1266-
@property
1267-
def scaling_factor_to_offset(self) -> dict[float, int]:
1268-
return self.base_layer.scaling_factor_to_offset
1269-
1270-
@classmethod
1271-
def can_replace_layer(
1272-
cls,
1273-
source_layer: nn.Module,
1274-
lora_config: LoRAConfig,
1275-
packed_modules_list: list,
1276-
model_config: Optional[PretrainedConfig],
1277-
) -> bool:
1278-
"""Returns True if the layer can be replaced by this LoRA layer."""
1279-
return (type(source_layer) is LinearScalingRotaryEmbedding
1280-
or type(source_layer) is RotaryEmbedding)
1281-
1282-
def extra_repr(self) -> str:
1283-
return self.base_layer.extra_repr()

0 commit comments

Comments
 (0)