Skip to content

Commit 516f857

Browse files
authored
deps: bump vllm>=0.10.0
- bump minimum vLLM version to v0.10.0 - drop PromptAdapterRequest and everything related - gha: tests: bump vllm tag to v0.10 fixes #268
1 parent 76b0c3a commit 516f857

File tree

11 files changed

+8
-304
lines changed

11 files changed

+8
-304
lines changed

.github/workflows/tests.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ env:
1414
# facilitate testing by building vLLM for CPU when needed
1515
VLLM_CPU_DISABLE_AVX512: "true"
1616
VLLM_TARGET_DEVICE: "cpu"
17+
VLLM_CPU_ONLY: "1"
18+
CMAKE_ARGS: "-DVLLM_CPU_ONLY=ON"
1719
# prefer index for torch cpu version
1820
UV_EXTRA_INDEX_URL: "https://download.pytorch.org/whl/cpu"
1921
# have uv match pip's behaviour for extra index operations
@@ -26,7 +28,7 @@ concurrency:
2628

2729
jobs:
2830
tests:
29-
timeout-minutes: 20
31+
timeout-minutes: 30
3032
runs-on: ${{ matrix.os }}
3133
strategy:
3234
fail-fast: false
@@ -35,7 +37,7 @@ jobs:
3537
pyv: ["3.12"]
3638
vllm_version:
3739
# - "" # skip the pypi version as it will not work on CPU
38-
- "git+https://github.com/vllm-project/vllm@v0.7.2"
40+
- "git+https://github.com/vllm-project/vllm@v0.10.0"
3941
- "git+https://github.com/vllm-project/vllm@main"
4042

4143
steps:

pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ classifiers = [
2626
requires-python = ">=3.9"
2727
dynamic = ["version"]
2828
dependencies = [
29-
"vllm>=0.7.2",
29+
"vllm>=0.10.0",
3030
"prometheus_client==0.21.1",
3131
"grpcio==1.70.0",
3232
"grpcio-health-checking==1.70.0",
@@ -44,7 +44,6 @@ Source = "https://github.com/opendatahub-io/vllm-tgis-adapter"
4444
grpc_healthcheck = "vllm_tgis_adapter.healthcheck:cli"
4545
model-util = "vllm_tgis_adapter.tgis_utils.scripts:cli"
4646
text-generation-server = "vllm_tgis_adapter.tgis_utils.scripts:cli"
47-
convert_pt_to_prompt = "vllm_tgis_adapter.tgis_utils.convert_pt_to_prompt:cli"
4847

4948
[project.optional-dependencies]
5049
tests = [

src/vllm_tgis_adapter/grpc/adapters.py

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,12 @@
1111
import dataclasses
1212
import json
1313
import re
14-
import tempfile
1514
from pathlib import Path
1615
from typing import TYPE_CHECKING
1716

1817
from vllm.entrypoints.openai.protocol import ErrorResponse
19-
from vllm.prompt_adapter.request import PromptAdapterRequest
2018

2119
from vllm_tgis_adapter.logging import init_logger
22-
from vllm_tgis_adapter.tgis_utils.convert_pt_to_prompt import convert_pt_to_peft
2320

2421
from .validation import TGISValidationError
2522

@@ -69,7 +66,7 @@ async def validate_adapters(
6966
| BatchedTokenizeRequest,
7067
adapter_store: AdapterStore | None,
7168
vllm_model_handler: OpenAIServingModels,
72-
) -> dict[str, LoRARequest | PromptAdapterRequest]:
69+
) -> dict[str, LoRARequest]:
7370
"""Validate the adapters.
7471
7572
Takes the adapter name from the request and constructs a valid
@@ -136,18 +133,6 @@ async def validate_adapters(
136133
# Use our cache for everything else
137134
adapter_store.adapters[adapter_id] = adapter_metadata
138135

139-
# Build the proper vllm request object
140-
if adapter_metadata.adapter_type == "PROMPT_TUNING":
141-
prompt_adapter_request = PromptAdapterRequest(
142-
prompt_adapter_id=adapter_metadata.unique_id,
143-
prompt_adapter_name=adapter_id,
144-
prompt_adapter_local_path=adapter_metadata.full_path,
145-
prompt_adapter_num_virtual_tokens=adapter_metadata.full_config.get(
146-
"num_virtual_tokens", 0
147-
),
148-
)
149-
return {"prompt_adapter_request": prompt_adapter_request}
150-
151136
# All other types unsupported
152137
TGISValidationError.AdapterUnsupported.error(adapter_metadata.adapter_type) # noqa: RET503
153138

@@ -188,17 +173,6 @@ def _load_adapter_metadata(adapter_id: str, adapter_path: str, unique_id: int) -
188173
adapter_id, "directory does not exist"
189174
)
190175

191-
# 🌶️🌶️🌶️ Check for caikit-style adapters first
192-
if (Path(adapter_path) / "decoder.pt").exists():
193-
# Create new temporary directory and convert to peft format there
194-
# NB: This requires write access to /tmp
195-
# Intentionally setting delete=False, we need the new adapter
196-
# files to exist for the life of the process
197-
logger.info("Converting caikit-style adapter %s to peft format", adapter_id)
198-
temp_dir = tempfile.TemporaryDirectory(delete=False)
199-
convert_pt_to_peft(adapter_path, temp_dir.name)
200-
adapter_path = temp_dir.name
201-
202176
adapter_config_path = Path(adapter_path) / "adapter_config.json"
203177
if not Path(adapter_config_path).exists():
204178
TGISValidationError.AdapterNotFound.error(

src/vllm_tgis_adapter/grpc/grpc_server.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@
6464
from vllm.sequence import Logprob
6565
from vllm.transformers_utils.tokenizer import AnyTokenizer
6666

67-
from .adapters import PromptAdapterRequest
6867
from .pb.generation_pb2 import (
6968
BatchedGenerationRequest,
7069
BatchedTokenizeRequest,
@@ -622,7 +621,7 @@ async def _validate_adapters(
622621
| BatchedTokenizeRequest,
623622
context: ServicerContext,
624623
vllm_model_handler: OpenAIServingModels,
625-
) -> dict[str, LoRARequest | PromptAdapterRequest]:
624+
) -> dict[str, LoRARequest]:
626625
try:
627626
adapters = await validate_adapters(
628627
request=request,

src/vllm_tgis_adapter/tgis_utils/convert_pt_to_prompt.py

Lines changed: 0 additions & 82 deletions
This file was deleted.

src/vllm_tgis_adapter/tgis_utils/logs.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,14 +63,11 @@ async def generate_with_logging(*args, **kwargs) -> AsyncGenerator[RequestOutput
6363
sampling_params = _get_arg("sampling_params", 1, *args, **kwargs)
6464
request_id = _get_arg("request_id", 2, *args, **kwargs)
6565
lora_request = _get_arg("lora_request", 3, *args, **kwargs)
66-
prompt_adapter_request = _get_arg("prompt_adapter_request", 5, *args, **kwargs)
6766

6867
correlation_id = get_correlation_id(request_id=request_id)
6968
adapter_id = None
7069
if lora_request:
7170
adapter_id = lora_request.adapter_id
72-
elif prompt_adapter_request:
73-
adapter_id = prompt_adapter_request.prompt_adapter_id
7471

7572
# Log the request
7673
with suppress(BaseException):

tests/conftest.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import asyncio
44
import sys
55
import threading
6-
from pathlib import Path
76
from typing import TYPE_CHECKING, Annotated, TypeVar
87

98
import pytest
@@ -33,11 +32,6 @@
3332
ArgFixture = Annotated[T, pytest.fixture]
3433

3534

36-
@pytest.fixture
37-
def prompt_tune_path():
38-
return Path(__file__).parent / "fixtures" / "bloom_sentiment_1"
39-
40-
4135
@pytest.fixture
4236
def lora_available() -> bool:
4337
# lora does not work on cpu
@@ -111,6 +105,7 @@ def args( # noqa: PLR0913
111105
f"--grpc-port={grpc_server_port}",
112106
f"--port={http_server_port}",
113107
"--dtype=float32",
108+
"--device=cpu",
114109
*extra_args,
115110
],
116111
)

tests/fixtures/bloom_sentiment_1/adapter_config.json

Lines changed: 0 additions & 4 deletions
This file was deleted.
-32.7 KB
Binary file not shown.

0 commit comments

Comments
 (0)