♻️ move all file access to threadpool

joerunde · joerunde · commit 76a34f3734a6 · 2024-10-11T17:25:02.000Z
Signed-off-by: Joe Runde &lt;Joseph.Runde@ibm.com&gt;
diff --git a/src/vllm_tgis_adapter/grpc/adapters.py b/src/vllm_tgis_adapter/grpc/adapters.py
@@ -88,35 +88,20 @@ async def validate_adapters(
         if global_thread_pool is None:
             global_thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=2)
 
-        # 🌶️🌶️🌶️ Check for caikit-style adapters first
-        if (
-            Path(local_adapter_path).exists()
-            and (Path(local_adapter_path) / "decoder.pt").exists()
-        ):
-            # Create new temporary directory and convert to peft format there
-            # NB: This requires write access to /tmp
-            # Intentionally setting delete=False, we need the new adapter
-            # files to exist for the life of the process
-            logger.info("Converting caikit-style adapter %s to peft format", adapter_id)
-            temp_dir = tempfile.TemporaryDirectory(delete=False)
-            convert_pt_to_peft(local_adapter_path, temp_dir.name)
-            local_adapter_path = temp_dir.name
-
-        adapter_config = await loop.run_in_executor(
+        # Increment the unique adapter id counter here in async land where we don't
+        # need to deal with thread-safety
+        unique_id = adapter_store.next_unique_id
+        adapter_store.next_unique_id += 1
+
+        adapter_metadata = await loop.run_in_executor(
             global_thread_pool,
-            _load_adapter_config_from_file,
+            _load_adapter_metadata,
             adapter_id,
             local_adapter_path,
+            unique_id,
         )
-        adapter_type = adapter_config.get("peft_type", None)
 
         # Add to cache
-        adapter_metadata = AdapterMetadata(
-            unique_id=adapter_store.next_unique_id,
-            adapter_type=adapter_type,
-            full_path=local_adapter_path,
-            full_config=adapter_config,
-        )
         adapter_store.adapters[adapter_id] = adapter_metadata
 
     # Build the proper vllm request object
@@ -142,8 +127,8 @@ async def validate_adapters(
     TGISValidationError.AdapterUnsupported.error(adapter_metadata.adapter_type)  # noqa: RET503
 
 
-def _load_adapter_config_from_file(adapter_id: str, adapter_path: str) -> dict:
-    """Get adapter from file.
+def _load_adapter_metadata(adapter_id: str, adapter_path: str, unique_id: int) -> dict:
+    """Get adapter metadata from files.
 
     Performs all the filesystem access required to deduce the type
     of the adapter. It's run in a separate thread pool executor so that file
@@ -154,17 +139,35 @@ def _load_adapter_config_from_file(adapter_id: str, adapter_path: str) -> dict:
             adapter_id, "directory does not exist"
         )
 
+    # 🌶️🌶️🌶️ Check for caikit-style adapters first
+    if Path(adapter_path).exists() and (Path(adapter_path) / "decoder.pt").exists():
+        # Create new temporary directory and convert to peft format there
+        # NB: This requires write access to /tmp
+        # Intentionally setting delete=False, we need the new adapter
+        # files to exist for the life of the process
+        logger.info("Converting caikit-style adapter %s to peft format", adapter_id)
+        temp_dir = tempfile.TemporaryDirectory(delete=False)
+        convert_pt_to_peft(adapter_path, temp_dir.name)
+        adapter_path = temp_dir.name
+
     adapter_config_path = Path(adapter_path) / "adapter_config.json"
     if not Path(adapter_config_path).exists():
         TGISValidationError.AdapterNotFound.error(
             adapter_id, "invalid adapter: no adapter_config.json found"
         )
 
-    # NB: blocks event loop
     with open(adapter_config_path) as adapter_config_file:
         adapter_config = json.load(adapter_config_file)
 
-    return adapter_config
+    adapter_type = adapter_config.get("peft_type", None)
+    adapter_metadata = AdapterMetadata(
+        unique_id=unique_id,
+        adapter_type=adapter_type,
+        full_path=adapter_path,
+        full_config=adapter_config,
+    )
+
+    return adapter_metadata
 
 
 def _reject_bad_adapter_id(adapter_id: str) -> None:
diff --git a/tests/fixtures/bloomz-560m-prompt-adapter/adapter_config.json b/tests/fixtures/bloomz-560m-prompt-adapter/adapter_config.json
@@ -0,0 +1,14 @@
+{
+  "base_model_name_or_path": "bigscience/bloomz-560m",
+  "inference_mode": true,
+  "num_attention_heads": 16,
+  "num_layers": 24,
+  "num_transformer_submodules": 1,
+  "num_virtual_tokens": 8,
+  "peft_type": "PROMPT_TUNING",
+  "prompt_tuning_init": "TEXT",
+  "prompt_tuning_init_text": "Classify if the tweet is a complaint or not:",
+  "task_type": "CAUSAL_LM",
+  "token_dim": 1024,
+  "tokenizer_name_or_path": "bigscience/bloomz-560m"
+}
diff --git a/tests/fixtures/bloomz-560m-prompt-adapter/adapter_model.bin b/tests/fixtures/bloomz-560m-prompt-adapter/adapter_model.bin
diff --git a/tests/fixtures/granite-3b-code-instruct-lora/README.md b/tests/fixtures/granite-3b-code-instruct-lora/README.md
@@ -0,0 +1 @@
+The adapter_model.safetensors file here is just a dummy file for tests to pass that will not actually need to load it
diff --git a/tests/fixtures/granite-3b-code-instruct-lora/adapter_config.json b/tests/fixtures/granite-3b-code-instruct-lora/adapter_config.json
@@ -0,0 +1,30 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "/granite/granite-3b-base-v2/step_75000_ckpt",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_attn",
+    "c_fc",
+    "c_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
diff --git a/tests/fixtures/granite-3b-code-instruct-lora/adapter_model.safetensors b/tests/fixtures/granite-3b-code-instruct-lora/adapter_model.safetensors
@@ -0,0 +1 @@
+fake weights
diff --git a/tests/test_adapters.py b/tests/test_adapters.py
@@ -1,6 +1,8 @@
 from pathlib import Path
 
 import pytest
+from vllm.lora.request import LoRARequest
+from vllm.prompt_adapter.request import PromptAdapterRequest
 
 from vllm_tgis_adapter.grpc.adapters import AdapterStore, validate_adapters
 from vllm_tgis_adapter.grpc.pb.generation_pb2 import (
@@ -11,7 +13,8 @@
 
 
 @pytest.mark.asyncio
-async def test_validate_adapters():
+async def test_caikit_prompt_adapter():
+    # Checks that decoder.pt style adapters from caikit_nlp are loaded correctly
     adapter_name = "bloom_sentiment_1"
     request = BatchedGenerationRequest(
         adapter_id=adapter_name,
@@ -20,10 +23,92 @@ async def test_validate_adapters():
     adapters = await validate_adapters(
         request, AdapterStore(cache_path=FIXTURES_DIR, adapters={})
     )
+    # Ensure we created a prompt adapter request
     assert "prompt_adapter_request" in adapters
     assert adapters["prompt_adapter_request"].prompt_adapter_name == adapter_name
     adapter_path = adapters["prompt_adapter_request"].prompt_adapter_local_path
     assert adapter_path is not None
+    assert isinstance(adapters["prompt_adapter_request"], PromptAdapterRequest)
 
+    # make sure the converted adapter is not in the cache directory
+    assert str(FIXTURES_DIR) not in adapter_path
+    assert "/tmp" in adapter_path
+
+    # Check for the converted artifacts
     assert Path.exists(Path(adapter_path) / "adapter_config.json")
     assert Path.exists(Path(adapter_path) / "adapter_model.safetensors")
+
+
+@pytest.mark.asyncio
+async def test_prompt_adapter():
+    adapter_name = "bloomz-560m-prompt-adapter"
+    request = BatchedGenerationRequest(
+        adapter_id=adapter_name,
+    )
+
+    adapters = await validate_adapters(
+        request, AdapterStore(cache_path=FIXTURES_DIR, adapters={})
+    )
+    # Ensure we created a prompt adapter request
+    assert "prompt_adapter_request" in adapters
+    assert adapters["prompt_adapter_request"].prompt_adapter_name == adapter_name
+    assert isinstance(adapters["prompt_adapter_request"], PromptAdapterRequest)
+
+
+@pytest.mark.asyncio
+async def test_lora_adapter():
+    adapter_name = "granite-3b-code-instruct-lora"
+    request = BatchedGenerationRequest(
+        adapter_id=adapter_name,
+    )
+
+    adapters = await validate_adapters(
+        request, AdapterStore(cache_path=FIXTURES_DIR, adapters={})
+    )
+    # Ensure we created a LoRA adapter request
+    assert "lora_request" in adapters
+    assert adapters["lora_request"].lora_name == adapter_name
+    assert isinstance(adapters["lora_request"], LoRARequest)
+
+
+@pytest.mark.asyncio
+async def test_adapters_are_cached():
+    adapter_name = "granite-3b-code-instruct-lora"
+    request = BatchedGenerationRequest(
+        adapter_id=adapter_name,
+    )
+
+    adapter_store = AdapterStore(cache_path=FIXTURES_DIR, adapters={})
+
+    adapters_1 = await validate_adapters(request, adapter_store=adapter_store)
+    adapters_2 = await validate_adapters(request, adapter_store=adapter_store)
+
+    # Metadata is only fetched and cached once
+    assert len(adapter_store.adapters) == 1
+    # Same unique ID is re-used for the second request
+    assert (
+        adapters_1["lora_request"].lora_int_id == adapters_2["lora_request"].lora_int_id
+    )
+
+
+@pytest.mark.asyncio
+async def test_store_handles_multiple_adapters():
+    adapter_store = AdapterStore(cache_path=FIXTURES_DIR, adapters={})
+
+    adapter_name = "granite-3b-code-instruct-lora"
+    request = BatchedGenerationRequest(
+        adapter_id=adapter_name,
+    )
+    adapters_1 = await validate_adapters(request, adapter_store=adapter_store)
+
+    adapter_name = "bloomz-560m-prompt-adapter"
+    request = BatchedGenerationRequest(
+        adapter_id=adapter_name,
+    )
+    adapters_2 = await validate_adapters(request, adapter_store=adapter_store)
+
+    assert len(adapter_store.adapters) == 2
+    assert (
+        adapters_1["lora_request"].lora_int_id
+        < adapters_2["prompt_adapter_request"].prompt_adapter_id
+    )

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+The adapter_model.safetensors file here is just a dummy file for tests to pass that will not actually need to load it`