[https://nvbugs/5510879][fix] Fix pytorch & TRT-python flows fused LoRA adapter modules weight split with TP>1 (NVIDIA#8063)

amitz-nv · dominicshanshan · commit 2bb9145cf8a3 · 2025-11-02T23:02:33.000-08:00
Signed-off-by: Amit Zuker &lt;203509407+amitz-nv@users.noreply.github.com&gt;
diff --git a/tests/unittest/llmapi/test_llm_multi_gpu_pytorch.py b/tests/unittest/llmapi/test_llm_multi_gpu_pytorch.py
@@ -62,6 +62,15 @@ def test_llama_7b_multi_lora_tp2():
         cuda_graph_config=None)
 
 
+@pytest.mark.gpu2
+def test_phi3_lora_fused_modules_output_on_tp2_identical_to_tp1() -> None:
+    check_phi3_lora_fused_modules_output_tp2_identical_to_tp1(
+        LLM,
+        # Disable CUDA graph
+        # TODO: remove this once we have a proper fix for CUDA graph in LoRA
+        cuda_graph_config=None)
+
+
 @pytest.mark.skip(reason="https://nvbugs/5560921")
 @skip_ray
 @pytest.mark.gpu2