test: Add llama 4 to ci (NVIDIA#3520)

dongfengy · web-flow · commit b71a0f76b437 · 2025-04-18T11:25:52.000+08:00
* Add llama 4 to ci

Signed-off-by: Dongfeng Yu &lt;dongfengy@nvidia.com&gt;

* Only test trtllm

Signed-off-by: Dongfeng Yu &lt;dongfengy@nvidia.com&gt;

* Disable marverick

Signed-off-by: Dongfeng Yu &lt;dongfengy@nvidia.com&gt;

---------

Signed-off-by: Dongfeng Yu &lt;dongfengy@nvidia.com&gt;
diff --git a/tests/integration/test_lists/test-db/l0_dgx_h200.yml b/tests/integration/test_lists/test-db/l0_dgx_h200.yml
@@ -15,3 +15,5 @@ l0_dgx_h200:
   tests:
   # ------------- PyTorch tests ---------------
   - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[ep4-attention_dp-cuda_graph-overlap_scheduler]
+  - unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[tp8-trtllm-scout] # 1h
+  # - unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[tp8-trtllm-maverick] # 3h will timeout
diff --git a/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py b/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py
@@ -26,7 +26,7 @@ def test_llama4(model_name, backend, tp_size):
     ]
 
     pytorch_config = PyTorchConfig(attn_backend=backend, )
-    model_dir = str(llm_models_root() / model_name)
+    model_dir = str(llm_models_root() / "llama4-models" / model_name)
 
     llm = LLM(
         model=model_dir,

Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@ def test_llama4(model_name, backend, tp_size):`
`26`	`26`	`]`
`27`	`27`
`28`	`28`	`pytorch_config = PyTorchConfig(attn_backend=backend, )`
`29`		`- model_dir = str(llm_models_root() / model_name)`
	`29`	`+ model_dir = str(llm_models_root() / "llama4-models" / model_name)`
`30`	`30`
`31`	`31`	`llm = LLM(`
`32`	`32`	`model=model_dir,`