LarryXFly
diff --git a/‎tests/integration/defs/perf/pytorch_model_config.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/integration/defs/perf/pytorch_model_config.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/integration/test_lists/qa/trt_llm_release_perf_cluster_test.yml‎
Lines changed: 4 additions & 4 deletions b/‎tests/integration/test_lists/qa/trt_llm_release_perf_cluster_test.yml‎
Lines changed: 4 additions & 4 deletions
@@ -18,7 +18,7 @@
 """
 
 
-def get_model_yaml_config(model_label: str, input_lens: list[int]) -> dict:
+def get_model_yaml_config(model_label: str) -> dict:
     """
         Return the yaml config corresponding to the model label.
         Args:
 
@@ -21,7 +21,6 @@ trt_llm_release_perf_cluster_test:
   - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-float8-maxbs:256-input_output_len:128,128-gpus:2]
   - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-float8-maxbs:256-input_output_len:512,32-gpus:2]
   - perf/test_perf.py::test_perf[llama_v2_13b-bench-float16-input_output_len:128,128-loras:8-gpus:2]
-  - perf/test_perf.py::test_perf[llama_v3.1_405b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-gpus:2]
   - perf/test_perf.py::test_perf[mixtral_8x7b_v0.1-bench-float16-input_output_len:128,128-quant:fp8-gpus:2]
 
 # Tests for systems with 4+ GPUs
@@ -34,16 +33,17 @@ trt_llm_release_perf_cluster_test:
   - perf/test_perf.py::test_perf[qwen_14b_chat-bench-float16-input_output_len:128,128-gpus:4]
   - perf/test_perf.py::test_perf[qwen_14b_chat-bench-float16-input_output_len:512,32-gpus:4]
   - perf/test_perf.py::test_perf[starcoder_15b-bench-float16-input_output_len:512,200-gpus:4]
-  - perf/test_perf.py::test_perf[llama_v3.1_405b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-gpus:4]
+  #- perf/test_perf.py::test_perf[llama_v3.1_405b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-gpus:4]
 
 # Tests for systems with 8+ GPUs
 - condition:
     ranges:
       system_gpu_count:
         gte: 8
   tests:
-  - perf/test_perf.py::test_perf[llama_v3.1_405b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-gpus:8]
-  - perf/test_perf.py::test_perf[llama_v3.1_405b_instruct_fp4-bench-pytorch-float4-input_output_len:512,32-gpus:8]
+  #- perf/test_perf.py::test_perf[llama_v3.1_405b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-gpus:8]
+  #- perf/test_perf.py::test_perf[llama_v3.1_405b_instruct_fp4-bench-pytorch-float4-input_output_len:512,32-gpus:8]
+  - perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-bfloat16-input_output_len:500,2000-quant:fp8-con:250-gpus:8]
   - perf/test_perf.py::test_perf[mixtral_8x22b_v0.1-bench-float16-input_output_len:512,512-quant:fp8-tp:8]
   - perf/test_perf.py::test_perf[gpt_20b-bench-float16-maxbs:8-input_output_len:128,128-reqs:80-gpus:8]
   - perf/test_perf.py::test_perf[gpt_20b-bench-float16-maxbs:8-input_output_len:512,32-reqs:80-gpus:8]