✨ invoke conversion at load time

joerunde · joerunde · commit 260ae9ca075e · 2024-10-11T17:25:02.000Z
Signed-off-by: Joe Runde &lt;Joseph.Runde@ibm.com&gt;
diff --git a/src/vllm_tgis_adapter/grpc/adapters.py b/src/vllm_tgis_adapter/grpc/adapters.py
@@ -11,12 +11,16 @@
 import dataclasses
 import json
 import re
+import tempfile
 from pathlib import Path
 from typing import TYPE_CHECKING
 
 from vllm.lora.request import LoRARequest
 from vllm.prompt_adapter.request import PromptAdapterRequest
 
+from vllm_tgis_adapter.logging import init_logger
+from vllm_tgis_adapter.tgis_utils.convert_pt_to_prompt import convert_pt_to_peft
+
 from .validation import TGISValidationError
 
 if TYPE_CHECKING:
@@ -30,6 +34,8 @@
 
 VALID_ADAPTER_ID_PATTERN = re.compile("[/\\w\\-]+")
 
+logger = init_logger(__name__)
+
 
 @dataclasses.dataclass
 class AdapterMetadata:
@@ -82,6 +88,20 @@ async def validate_adapters(
         if global_thread_pool is None:
             global_thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=2)
 
+        # 🌶️🌶️🌶️ Check for caikit-style adapters first
+        if (
+            Path(local_adapter_path).exists()
+            and (Path(local_adapter_path) / "decoder.pt").exists()
+        ):
+            # Create new temporary directory and convert to peft format there
+            # NB: This requires write access to /tmp
+            # Intentionally setting delete=False, we need the new adapter
+            # files to exist for the life of the process
+            logger.info("Converting caikit-style adapter %s to peft format", adapter_id)
+            temp_dir = tempfile.TemporaryDirectory(delete=False)
+            convert_pt_to_peft(local_adapter_path, temp_dir.name)
+            local_adapter_path = temp_dir.name
+
         adapter_config = await loop.run_in_executor(
             global_thread_pool,
             _load_adapter_config_from_file,
diff --git a/src/vllm_tgis_adapter/tgis_utils/convert_pt_to_prompt.py b/src/vllm_tgis_adapter/tgis_utils/convert_pt_to_prompt.py
@@ -2,7 +2,6 @@
 # users the ability to be able to run it independently without
 # having to install vllm as a dependency
 import argparse
-import sys
 from pathlib import Path
 
 import torch
@@ -30,17 +29,15 @@ def convert_pt_to_peft(input_dir: str, output_dir: str) -> None:
     # read decoder.pt file
     decoder_pt_path = Path(input_dir) / "decoder.pt"
     if not decoder_pt_path.exists():
-        print(f"No decoder.pt model found in path {decoder_pt_path}")  # noqa: T201
-        sys.exit()
+        raise ValueError(f"No decoder.pt model found in path {decoder_pt_path}")
 
     # error if encoder.pt file exists
     encoder_pt_path = Path(input_dir) / "encoder.pt"
     if encoder_pt_path.exists():
-        print(  # noqa: T201
+        raise ValueError(
             f"encoder.pt model found in path {encoder_pt_path}, \
             encoder-decoder models are not yet supported, sorry!"
         )
-        sys.exit()
 
     # check output dir
     if output_dir is None:
@@ -58,8 +55,7 @@ def convert_pt_to_peft(input_dir: str, output_dir: str) -> None:
 
     # error if output_dir is file
     if output_path.is_file():
-        print(f"File found instead of dir {output_path}, exiting...")  # noqa: T201
-        sys.exit()
+        raise ValueError(f"File found instead of dir {output_path}")
 
     # load tensors from decoder.pt and save to .safetensors
     decoder_tensors = torch.load(decoder_pt_path, weights_only=True)
@@ -73,6 +69,7 @@ def convert_pt_to_peft(input_dir: str, output_dir: str) -> None:
     adapter_config = {
         "num_virtual_tokens": decoder_tensors.shape[0],
         "peft_type": "PROMPT_TUNING",
+        "base_model_name_or_path": "this-is-a/temporary-conversion",
     }
 
     with open(output_path / "adapter_config.json", "w") as config_file: