merge lora fix & autoawq build issue (#2963)

HappyAmazonian · redhairdragon · Lokiiiiii · web-flow · commit 63a6dcf39c79 · 2025-11-19T18:13:51.000-08:00
Co-authored-by: Shen Teng &lt;shen_teng@engineering.ucla.edu&gt;
Co-authored-by: Loki &lt;lokravi@amazon.com&gt;
diff --git a/engines/python/setup/djl_python/lmi_vllm/vllm_async_service.py b/engines/python/setup/djl_python/lmi_vllm/vllm_async_service.py
@@ -273,6 +273,8 @@ async def add_lora(self, lora_name: str, lora_alias: str, lora_path: str):
         logging.info(f"Adding LoRA {lora_name} from {lora_path}")
         lora_id = self.lora_id_counter.inc(1)
         lora_request = create_lora_request(lora_name, lora_id, lora_path, None)
+        # Register the LoRA request with the model registry so vLLM can find it
+        self.model_registry.lora_requests[lora_name] = lora_request
         self.lora_requests[lora_request.lora_name] = lora_request
         result = await self.vllm_engine.add_lora(lora_request)
         logging.info(f"LoRA {lora_name} added to engine: {result}")
@@ -285,6 +287,7 @@ async def remove_lora(self, lora_name: str, lora_alias: str):
         lora_request = get_lora_request(lora_name, self.lora_requests)
         result = await self.vllm_engine.remove_lora(lora_request.lora_int_id)
         del self.lora_requests[lora_name]
+        del self.model_registry.lora_requests[lora_name]
         return result
 
     async def pin_lora(self, lora_name: str, lora_alias: str):
diff --git a/serving/docker/lmi-container-requirements.txt b/serving/docker/lmi-container-requirements.txt
@@ -23,7 +23,7 @@ scipy==1.16.0
 onnx==1.19.0
 sentence_transformers
 onnxruntime-gpu==1.20.0
-autoawq
+autoawq==0.2.9
 tokenizers
 pydantic==2.11.7
 optimum==1.23.2