Fix for model load error in CI

vivekgoe · vivekgoe · commit 3b22757d09dc · 2025-08-07T20:28:45.000+05:30
Signed-off-by: Vivek &lt;vgoel@habana.ai&gt;
diff --git a/tests/unit_tests/lora/test_llama_multilora.py b/tests/unit_tests/lora/test_llama_multilora.py
@@ -2,6 +2,18 @@
 
 from vllm import EngineArgs, LLMEngine, RequestOutput, SamplingParams
 from vllm.lora.request import LoRARequest
+import os
+
+# Need to create symlink to avoid long path error
+# thrown by HF Hub validation check. Downloading
+# model directly from Hub can be done but will need
+# adding HF token to repo secrets
+src = "/mnt/weka/data/pytorch/llama2/Llama-2-7b-hf"
+dst = "test_model"
+if os.path.islink(dst):
+    os.remove(dst)
+os.symlink(src, dst)
+MODEL_PATH = dst
 
 
 def create_test_prompts(
@@ -99,14 +111,13 @@ def process_requests(engine: LLMEngine,
 
 def _test_llama_multilora(sql_lora_files, tp_size):
     """Main function that sets up and runs the prompt processing."""
-    engine_args = EngineArgs(
-        model="/mnt/weka/data/pytorch/llama2/Llama-2-7b-hf",
-        enable_lora=True,
-        max_loras=2,
-        max_lora_rank=8,
-        max_num_seqs=256,
-        dtype='bfloat16',
-        tensor_parallel_size=tp_size)
+    engine_args = EngineArgs(model=MODEL_PATH,
+                             enable_lora=True,
+                             max_loras=2,
+                             max_lora_rank=8,
+                             max_num_seqs=256,
+                             dtype='bfloat16',
+                             tensor_parallel_size=tp_size)
     engine = LLMEngine.from_engine_args(engine_args)
     test_prompts = create_test_prompts(sql_lora_files)
     results = process_requests(engine, test_prompts)
diff --git a/tests/unit_tests/lora/test_llama_tp.py b/tests/unit_tests/lora/test_llama_tp.py
@@ -4,11 +4,19 @@
 
 import vllm
 from vllm.lora.request import LoRARequest
-
+import os
 #from ..utils import VLLM_PATH, create_new_process_for_each_test, multi_gpu_test
 
-MODEL_PATH = "/mnt/weka/data/pytorch/llama2/Llama-2-7b-hf"
-#MODEL_PATH = "meta-llama/Llama-2-7b-hf"
+# Need to create symlink to avoid long path error
+# thrown by HF Hub validation check. Downloading
+# model directly from Hub can be done but will need
+# adding HF token to repo secrets
+src = "/mnt/weka/data/pytorch/llama2/Llama-2-7b-hf"
+dst = "test_model"
+if os.path.islink(dst):
+    os.remove(dst)
+os.symlink(src, dst)
+MODEL_PATH = dst
 
 EXPECTED_NO_LORA_OUTPUT = [
     "\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_75 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant",  # noqa: E501