pamelap-nvidia · JennyLiu-nv · Jul 1, 2025 · Jul 2, 2025 · Jul 2, 2025 · Jul 2, 2025
diff --git a/tests/integration/defs/perf/test_perf.py b/tests/integration/defs/perf/test_perf.py
@@ -82,6 +82,7 @@
     "deepseek_r1_distill_qwen_32b": "DeepSeek-R1/DeepSeek-R1-Distill-Qwen-32B",
     "mixtral_8x22b_v0.1": "Mixtral-8x22B-v0.1",
     "mistral_7b_v0.1": "mistral-7b-v0.1",
+    "mistral_7b_v0.3": "Mistral-7B-Instruct-v0.3",
     "deepseek_r1_fp8": "DeepSeek-R1/DeepSeek-R1",
     "deepseek_r1_nvfp4": "DeepSeek-R1/DeepSeek-R1-FP4",
     "deepseek_v3_lite_fp8": "DeepSeek-V3-Lite/fp8",
@@ -137,6 +138,7 @@
     "mistral_7b_v0.1_hf": "mistralai/Mistral-7B-v0.1",
     "flan_t5_base_hf": "google/flan-t5-small",
     "phi_4_mini_instruct_hf": "microsoft/Phi-4-mini-instruct",
+    "nvila_15b": "nvidia/NVILA-15B",
 }
 LORA_MODEL_PATH = {
     "llama_v2_13b": "llama-models-v2/chinese-llama-2-lora-13b",
@@ -1379,8 +1381,8 @@ def get_commands(self):
                     data_cmd = self.get_prepare_data_command(
                         engine_dir, input_len, output_len)
                     data_cmds.append(data_cmd)
-
         # Construct MPI command.
+        print(f"##########JJJJJJ data_cmd: {data_cmd}")
         mpi_cmd = []
         if num_gpus > 1 and num_gpus <= 8 and not self._config.runtime == "bench":
             if cpu_socket_count_gt_1():

diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py
@@ -1607,6 +1607,9 @@ def test_ptp_quickstart(llm_root, llm_venv):
     pytest.param('Mixtral-8x7B-BF16',
                  'Mixtral-8x7B-Instruct-v0.1',
                  marks=skip_pre_blackwell),
+    pytest.param('Mistral-7B-Instruct-v0.3',
+                 'Mistral-7B-Instruct-v0.3',
+                 marks=skip_pre_blackwell),
     pytest.param('Mistral-Nemo-12b-Base',
                  'Mistral-Nemo-Base-2407',
                  marks=skip_pre_blackwell),
@@ -2012,6 +2015,20 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path,
                 ],
             ],
         },
+        "NVILA-15B-FP16": {
+            "image": [
+                ["stormy", "ocean", "waves", "clouds", "gray", "sky"],
+                ["rock", "formation", "sunny", "sky", "clouds"],
+                ["road", "busy", "car", "black", "blue"],
+            ],
+            "video": [
+                ["woman", "street", "night", "walking", "camera"],
+                [
+                    "stunning", "earth", "space", "planet", "curvature", "dark",
+                    "bright", "contrast", "illuminate"
+                ],
+            ],
+        },
         "llava-v1.6-mistral-7b": {
             "image": [
                 [

diff --git a/tests/integration/test_lists/qa/llm_release_digits_func.txt b/tests/integration/test_lists/qa/llm_release_digits_func.txt
@@ -12,6 +12,7 @@ test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP8-modelopt-hf-model-hub
 test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP4-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp4]
 test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-BF16-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1]
 test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-BF16-Mixtral-8x7B-Instruct-v0.1]
+test_e2e.py::test_ptp_quickstart_advanced[Mistral-7B-Instruct-v0.3-Mistral-7B-Instruct-v0.3]
 test_e2e.py::test_ptp_quickstart_advanced[Mistral-Nemo-12b-Base-Mistral-Nemo-Base-2407]
 test_e2e.py::test_ptp_quickstart_advanced[DeepSeek-R1-Distill-Qwen-32B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-32B]
 

diff --git a/tests/integration/test_lists/qa/llm_release_digits_perf.txt b/tests/integration/test_lists/qa/llm_release_digits_perf.txt
@@ -1,28 +1,38 @@
-perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-streaming-bfloat16-input_output_len:128,128]
-perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-streaming-bfloat16-input_output_len:512,128]
-perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:128,128]
-perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:512,128]
+perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] #passed
+perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] #passed
+perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] #passed
+perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] SKIP (https://nvbugspro.nvidia.com/bug/5387445)
+perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] SKIP (https://nvbugspro.nvidia.com/bug/5387445)
+perf/test_perf.py::test_perf[llama_v3.3_70b_instruct-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] SKIP (https://nvbugspro.nvidia.com/bug/5387445)
+perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] SKIP (https://nvbugspro.nvidia.com/bug/5387445)
+perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1]
+perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1]
+perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1]
+perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1]
+perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1]
+perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1]
+perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1]
+perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1]
+perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1]
+perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1]
+perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-kv_cache_dtype:fp8]
+perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:128,128]
+perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:512,128]
+perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:512,128]
+perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:128,128]
 perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128]
 perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,128]
-perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,2048]
-perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128]
-perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128]
-perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,2048]
-
-perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-input_output_len:128,128]
+perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-reqs:100-con:2]
+perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-input_output_len:128,128]
+perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-input_output_len:512,32]
 perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-input_output_len:128,128]
-perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-input_output_len:512,128]
-
+perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-input_output_len:128,128]
 perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128]
-perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:512,128]
-perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128]
-perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128]
-
-perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct-bench-pytorch-float16-input_output_len:128,128]
-perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-input_output_len:128,128]
-perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128]
+perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-streaming-float8-input_output_len:512,32]
+perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,32-kv_cache_dtype:fp8]
+perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128]
 perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128]
-perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128]
-
-perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-input_output_len:128,128]
-perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-input_output_len:128,128]
+perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-kv_cache_dtype:fp8]
+perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128]
+perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1]
+perf/test_perf.py::test_perf[nvila_15b-bench-pytorch-bfloat16-input_output_len:128,128]