diff --git a/engines/python/setup/djl_python/lmi_vllm/vllm_async_service.py b/engines/python/setup/djl_python/lmi_vllm/vllm_async_service.py index f3d216561..a1c2aca5e 100644 --- a/engines/python/setup/djl_python/lmi_vllm/vllm_async_service.py +++ b/engines/python/setup/djl_python/lmi_vllm/vllm_async_service.py @@ -270,6 +270,8 @@ async def add_lora(self, lora_name: str, lora_alias: str, lora_path: str): logging.info(f"Adding LoRA {lora_name} from {lora_path}") lora_id = self.lora_id_counter.inc(1) lora_request = create_lora_request(lora_name, lora_id, lora_path, None) + # Register the LoRA request with the model registry so vLLM can find it + self.model_registry.lora_requests[lora_name] = lora_request self.lora_requests[lora_request.lora_name] = lora_request result = await self.vllm_engine.add_lora(lora_request) logging.info(f"LoRA {lora_name} added to engine: {result}") @@ -282,6 +284,7 @@ async def remove_lora(self, lora_name: str, lora_alias: str): lora_request = get_lora_request(lora_name, self.lora_requests) result = await self.vllm_engine.remove_lora(lora_request.lora_int_id) del self.lora_requests[lora_name] + del self.model_registry.lora_requests[lora_name] return result async def pin_lora(self, lora_name: str, lora_alias: str):