Skip to content
This repository was archived by the owner on Sep 4, 2025. It is now read-only.

Commit 04e7c4e

Browse files
[Misc] remove peft as dependency for prompt models (vllm-project#8162)
1 parent 5faedf1 commit 04e7c4e

File tree

3 files changed

+94
-9
lines changed

3 files changed

+94
-9
lines changed

vllm/config.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1558,14 +1558,6 @@ class PromptAdapterConfig:
15581558
prompt_adapter_dtype: Optional[torch.dtype] = None
15591559

15601560
def __post_init__(self):
1561-
library_name = 'peft'
1562-
try:
1563-
__import__(library_name)
1564-
except ImportError as e:
1565-
raise ImportError(
1566-
f"'{library_name}' is not installed for prompt adapter support."
1567-
f"Please install it using 'pip install {library_name}'."
1568-
) from e
15691561

15701562
if self.max_prompt_adapters < 1:
15711563
raise ValueError(f"max_prompt_adapters "

vllm/prompt_adapter/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from vllm.prompt_adapter.layers import (
1515
VocabParallelEmbeddingWithPromptAdapter) # yapf: disable
1616
from vllm.prompt_adapter.layers import PromptAdapterMapping
17+
from vllm.prompt_adapter.utils import load_peft_weights
1718

1819
logger = logging.getLogger(__name__)
1920

@@ -90,7 +91,6 @@ def from_local_checkpoint(
9091
config: PromptAdapterConfig,
9192
device: str = "cuda",
9293
) -> "PromptAdapterModel":
93-
from peft.utils import load_peft_weights
9494

9595
if num_virtual_tokens > config.max_prompt_adapter_token:
9696
raise ValueError(

vllm/prompt_adapter/utils.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# code borrowed from: https://github.com/huggingface/peft/blob/v0.12.0/src/peft/utils/save_and_load.py#L420
2+
3+
import os
4+
from typing import Optional
5+
6+
import torch
7+
from huggingface_hub import file_exists, hf_hub_download
8+
from huggingface_hub.utils import EntryNotFoundError
9+
from safetensors.torch import load_file as safe_load_file
10+
11+
WEIGHTS_NAME = "adapter_model.bin"
12+
SAFETENSORS_WEIGHTS_NAME = "adapter_model.safetensors"
13+
14+
15+
# Get current device name based on available devices
16+
def infer_device() -> str:
17+
if torch.cuda.is_available():
18+
return "cuda"
19+
return "cpu"
20+
21+
22+
def load_peft_weights(model_id: str,
23+
device: Optional[str] = None,
24+
**hf_hub_download_kwargs) -> dict:
25+
r"""
26+
A helper method to load the PEFT weights from the HuggingFace Hub or locally
27+
28+
Args:
29+
model_id (`str`):
30+
The local path to the adapter weights or the name of the adapter to
31+
load from the HuggingFace Hub.
32+
device (`str`):
33+
The device to load the weights onto.
34+
hf_hub_download_kwargs (`dict`):
35+
Additional arguments to pass to the `hf_hub_download` method when
36+
loading from the HuggingFace Hub.
37+
"""
38+
path = (os.path.join(model_id, hf_hub_download_kwargs["subfolder"])
39+
if hf_hub_download_kwargs.get("subfolder", None) is not None else
40+
model_id)
41+
42+
if device is None:
43+
device = infer_device()
44+
45+
if os.path.exists(os.path.join(path, SAFETENSORS_WEIGHTS_NAME)):
46+
filename = os.path.join(path, SAFETENSORS_WEIGHTS_NAME)
47+
use_safetensors = True
48+
elif os.path.exists(os.path.join(path, WEIGHTS_NAME)):
49+
filename = os.path.join(path, WEIGHTS_NAME)
50+
use_safetensors = False
51+
else:
52+
token = hf_hub_download_kwargs.get("token", None)
53+
if token is None:
54+
token = hf_hub_download_kwargs.get("use_auth_token", None)
55+
56+
hub_filename = (os.path.join(hf_hub_download_kwargs["subfolder"],
57+
SAFETENSORS_WEIGHTS_NAME)
58+
if hf_hub_download_kwargs.get("subfolder", None)
59+
is not None else SAFETENSORS_WEIGHTS_NAME)
60+
has_remote_safetensors_file = file_exists(
61+
repo_id=model_id,
62+
filename=hub_filename,
63+
revision=hf_hub_download_kwargs.get("revision", None),
64+
repo_type=hf_hub_download_kwargs.get("repo_type", None),
65+
token=token,
66+
)
67+
use_safetensors = has_remote_safetensors_file
68+
69+
if has_remote_safetensors_file:
70+
# Priority 1: load safetensors weights
71+
filename = hf_hub_download(
72+
model_id,
73+
SAFETENSORS_WEIGHTS_NAME,
74+
**hf_hub_download_kwargs,
75+
)
76+
else:
77+
try:
78+
filename = hf_hub_download(model_id, WEIGHTS_NAME,
79+
**hf_hub_download_kwargs)
80+
except EntryNotFoundError:
81+
raise ValueError( # noqa: B904
82+
f"Can't find weights for {model_id} in {model_id} or \
83+
in the Hugging Face Hub. "
84+
f"Please check that the file {WEIGHTS_NAME} or \
85+
{SAFETENSORS_WEIGHTS_NAME} is present at {model_id}.")
86+
87+
if use_safetensors:
88+
adapters_weights = safe_load_file(filename, device=device)
89+
else:
90+
adapters_weights = torch.load(filename,
91+
map_location=torch.device(device))
92+
93+
return adapters_weights

0 commit comments

Comments
 (0)