opendatahub-io
diff --git a/‎.github/workflows/tests.yaml
Lines changed: 4 additions & 2 deletions b/‎.github/workflows/tests.yaml
Lines changed: 4 additions & 2 deletions
diff --git a/‎pyproject.toml
Lines changed: 1 addition & 2 deletions b/‎pyproject.toml
Lines changed: 1 addition & 2 deletions
diff --git a/‎src/vllm_tgis_adapter/grpc/adapters.py
Lines changed: 1 addition & 27 deletions b/‎src/vllm_tgis_adapter/grpc/adapters.py
Lines changed: 1 addition & 27 deletions
diff --git a/‎src/vllm_tgis_adapter/grpc/grpc_server.py
Lines changed: 1 addition & 2 deletions b/‎src/vllm_tgis_adapter/grpc/grpc_server.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎src/vllm_tgis_adapter/tgis_utils/convert_pt_to_prompt.py
Lines changed: 0 additions & 82 deletions b/‎src/vllm_tgis_adapter/tgis_utils/convert_pt_to_prompt.py
Lines changed: 0 additions & 82 deletions
diff --git a/‎src/vllm_tgis_adapter/tgis_utils/logs.py
Lines changed: 0 additions & 3 deletions b/‎src/vllm_tgis_adapter/tgis_utils/logs.py
Lines changed: 0 additions & 3 deletions
diff --git a/‎tests/conftest.py
Lines changed: 1 addition & 6 deletions b/‎tests/conftest.py
Lines changed: 1 addition & 6 deletions
diff --git a/‎tests/fixtures/bloom_sentiment_1/adapter_config.json
Lines changed: 0 additions & 4 deletions b/‎tests/fixtures/bloom_sentiment_1/adapter_config.json
Lines changed: 0 additions & 4 deletions
diff --git a/‎tests/fixtures/bloom_sentiment_1/decoder.pt
-32.7 KB b/‎tests/fixtures/bloom_sentiment_1/decoder.pt
-32.7 KB
@@ -14,6 +14,8 @@ env:
   # facilitate testing by building vLLM for CPU when needed
   VLLM_CPU_DISABLE_AVX512: "true"
   VLLM_TARGET_DEVICE: "cpu"
+  VLLM_CPU_ONLY: "1"
+  CMAKE_ARGS: "-DVLLM_CPU_ONLY=ON"
   # prefer index for torch cpu version
   UV_EXTRA_INDEX_URL: "https://download.pytorch.org/whl/cpu"
   # have uv match pip's behaviour for extra index operations
@@ -26,7 +28,7 @@ concurrency:
 
 jobs:
   tests:
-    timeout-minutes: 20
+    timeout-minutes: 30
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
@@ -35,7 +37,7 @@ jobs:
         pyv: ["3.12"]
         vllm_version:
           # - "" # skip the pypi version as it will not work on CPU
-          - "git+https://github.com/vllm-project/vllm@v0.7.2"
+          - "git+https://github.com/vllm-project/vllm@v0.10.0"
           - "git+https://github.com/vllm-project/vllm@main"
 
     steps:
 
@@ -26,7 +26,7 @@ classifiers = [
 requires-python = ">=3.9"
 dynamic = ["version"]
 dependencies = [
-  "vllm>=0.7.2",
+  "vllm>=0.10.0",
   "prometheus_client==0.21.1",
   "grpcio==1.70.0",
   "grpcio-health-checking==1.70.0",
@@ -44,7 +44,6 @@ Source = "https://github.com/opendatahub-io/vllm-tgis-adapter"
 grpc_healthcheck = "vllm_tgis_adapter.healthcheck:cli"
 model-util = "vllm_tgis_adapter.tgis_utils.scripts:cli"
 text-generation-server = "vllm_tgis_adapter.tgis_utils.scripts:cli"
-convert_pt_to_prompt = "vllm_tgis_adapter.tgis_utils.convert_pt_to_prompt:cli"
 
 [project.optional-dependencies]
 tests = [
 
@@ -11,15 +11,12 @@
 import dataclasses
 import json
 import re
-import tempfile
 from pathlib import Path
 from typing import TYPE_CHECKING
 
 from vllm.entrypoints.openai.protocol import ErrorResponse
-from vllm.prompt_adapter.request import PromptAdapterRequest
 
 from vllm_tgis_adapter.logging import init_logger
-from vllm_tgis_adapter.tgis_utils.convert_pt_to_prompt import convert_pt_to_peft
 
 from .validation import TGISValidationError
 
@@ -69,7 +66,7 @@ async def validate_adapters(
     | BatchedTokenizeRequest,
     adapter_store: AdapterStore | None,
     vllm_model_handler: OpenAIServingModels,
-) -> dict[str, LoRARequest | PromptAdapterRequest]:
+) -> dict[str, LoRARequest]:
     """Validate the adapters.
 
     Takes the adapter name from the request and constructs a valid
@@ -136,18 +133,6 @@ async def validate_adapters(
             # Use our cache for everything else
             adapter_store.adapters[adapter_id] = adapter_metadata
 
-    # Build the proper vllm request object
-    if adapter_metadata.adapter_type == "PROMPT_TUNING":
-        prompt_adapter_request = PromptAdapterRequest(
-            prompt_adapter_id=adapter_metadata.unique_id,
-            prompt_adapter_name=adapter_id,
-            prompt_adapter_local_path=adapter_metadata.full_path,
-            prompt_adapter_num_virtual_tokens=adapter_metadata.full_config.get(
-                "num_virtual_tokens", 0
-            ),
-        )
-    return {"prompt_adapter_request": prompt_adapter_request}
-
     # All other types unsupported
     TGISValidationError.AdapterUnsupported.error(adapter_metadata.adapter_type)  # noqa: RET503
 
@@ -188,17 +173,6 @@ def _load_adapter_metadata(adapter_id: str, adapter_path: str, unique_id: int) -
             adapter_id, "directory does not exist"
         )
 
-    # 🌶️🌶️🌶️ Check for caikit-style adapters first
-    if (Path(adapter_path) / "decoder.pt").exists():
-        # Create new temporary directory and convert to peft format there
-        # NB: This requires write access to /tmp
-        # Intentionally setting delete=False, we need the new adapter
-        # files to exist for the life of the process
-        logger.info("Converting caikit-style adapter %s to peft format", adapter_id)
-        temp_dir = tempfile.TemporaryDirectory(delete=False)
-        convert_pt_to_peft(adapter_path, temp_dir.name)
-        adapter_path = temp_dir.name
-
     adapter_config_path = Path(adapter_path) / "adapter_config.json"
     if not Path(adapter_config_path).exists():
         TGISValidationError.AdapterNotFound.error(
 
@@ -64,7 +64,6 @@
     from vllm.sequence import Logprob
     from vllm.transformers_utils.tokenizer import AnyTokenizer
 
-    from .adapters import PromptAdapterRequest
     from .pb.generation_pb2 import (
         BatchedGenerationRequest,
         BatchedTokenizeRequest,
@@ -622,7 +621,7 @@ async def _validate_adapters(
         | BatchedTokenizeRequest,
         context: ServicerContext,
         vllm_model_handler: OpenAIServingModels,
-    ) -> dict[str, LoRARequest | PromptAdapterRequest]:
+    ) -> dict[str, LoRARequest]:
         try:
             adapters = await validate_adapters(
                 request=request,
 
@@ -63,14 +63,11 @@ async def generate_with_logging(*args, **kwargs) -> AsyncGenerator[RequestOutput
         sampling_params = _get_arg("sampling_params", 1, *args, **kwargs)
         request_id = _get_arg("request_id", 2, *args, **kwargs)
         lora_request = _get_arg("lora_request", 3, *args, **kwargs)
-        prompt_adapter_request = _get_arg("prompt_adapter_request", 5, *args, **kwargs)
 
         correlation_id = get_correlation_id(request_id=request_id)
         adapter_id = None
         if lora_request:
             adapter_id = lora_request.adapter_id
-        elif prompt_adapter_request:
-            adapter_id = prompt_adapter_request.prompt_adapter_id
 
         # Log the request
         with suppress(BaseException):
 
@@ -3,7 +3,6 @@
 import asyncio
 import sys
 import threading
-from pathlib import Path
 from typing import TYPE_CHECKING, Annotated, TypeVar
 
 import pytest
@@ -33,11 +32,6 @@
     ArgFixture = Annotated[T, pytest.fixture]
 
 
-@pytest.fixture
-def prompt_tune_path():
-    return Path(__file__).parent / "fixtures" / "bloom_sentiment_1"
-
-
 @pytest.fixture
 def lora_available() -> bool:
     # lora does not work on cpu
@@ -111,6 +105,7 @@ def args(  # noqa: PLR0913
             f"--grpc-port={grpc_server_port}",
             f"--port={http_server_port}",
             "--dtype=float32",
+            "--device=cpu",
             *extra_args,
         ],
     )