From 83a72437e0e3bb568dedb92da12772c99aca3da9 Mon Sep 17 00:00:00 2001 From: Jenny Liu Date: Tue, 1 Jul 2025 08:50:10 +0000 Subject: [PATCH 01/14] update NVILA-15B-FP16 match keywords --- tests/integration/defs/test_e2e.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py index 145c069fd93..48c95f636a3 100644 --- a/tests/integration/defs/test_e2e.py +++ b/tests/integration/defs/test_e2e.py @@ -2012,6 +2012,20 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path, ], ], }, + "NVILA-15B-FP16": { + "image": [ + ["stormy", "ocean", "waves", "clouds", "gray", "sky"], + ["rock", "formation", "sunny", "sky", "clouds"], + ["road", "busy", "car", "black", "blue"], + ], + "video": [ + ["woman", "street", "night", "walking", "camera"], + [ + "stunning", "earth", "space", "planet", "curvature", "dark", + "bright", "contrast", "illuminate" + ], + ], + }, "llava-v1.6-mistral-7b": { "image": [ [ From f997eb2cee71093eeb4e820e935c17c2a23bd8df Mon Sep 17 00:00:00 2001 From: Jenny Liu Date: Wed, 2 Jul 2025 01:00:33 +0000 Subject: [PATCH 02/14] update perf case - only support pytorch backend --- tests/integration/test_lists/qa/llm_release_digits_perf.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/integration/test_lists/qa/llm_release_digits_perf.txt b/tests/integration/test_lists/qa/llm_release_digits_perf.txt index a216f04c302..d4aef72bff7 100644 --- a/tests/integration/test_lists/qa/llm_release_digits_perf.txt +++ b/tests/integration/test_lists/qa/llm_release_digits_perf.txt @@ -1,7 +1,3 @@ -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-streaming-bfloat16-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-streaming-bfloat16-input_output_len:512,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:512,128] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,128] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,2048] From f038bf87c3d69143f85d53c7fd040561402d355a Mon Sep 17 00:00:00 2001 From: Jenny Liu Date: Wed, 2 Jul 2025 01:45:02 +0000 Subject: [PATCH 03/14] add model Mixtral-7B-Instruct-v0.3 --- tests/integration/defs/test_e2e.py | 3 +++ tests/integration/test_lists/qa/llm_release_digits_func.txt | 1 + 2 files changed, 4 insertions(+) diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py index 48c95f636a3..d883beb6412 100644 --- a/tests/integration/defs/test_e2e.py +++ b/tests/integration/defs/test_e2e.py @@ -1607,6 +1607,9 @@ def test_ptp_quickstart(llm_root, llm_venv): pytest.param('Mixtral-8x7B-BF16', 'Mixtral-8x7B-Instruct-v0.1', marks=skip_pre_blackwell), + pytest.param('Mixtral-7B-Instruct-v0.3', + 'Mistral-7B-Instruct-v0.3', + marks=skip_pre_blackwell), pytest.param('Mistral-Nemo-12b-Base', 'Mistral-Nemo-Base-2407', marks=skip_pre_blackwell), diff --git a/tests/integration/test_lists/qa/llm_release_digits_func.txt b/tests/integration/test_lists/qa/llm_release_digits_func.txt index 00d0bac895e..6ea117597e7 100644 --- a/tests/integration/test_lists/qa/llm_release_digits_func.txt +++ b/tests/integration/test_lists/qa/llm_release_digits_func.txt @@ -12,6 +12,7 @@ test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP8-modelopt-hf-model-hub test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP4-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp4] test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-BF16-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1] test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-BF16-Mixtral-8x7B-Instruct-v0.1] +test_e2e.py::test_ptp_quickstart_advanced[Mixtral-7B-Instruct-v0.3-Mixtral-7B-Instruct-v0.3] test_e2e.py::test_ptp_quickstart_advanced[Mistral-Nemo-12b-Base-Mistral-Nemo-Base-2407] test_e2e.py::test_ptp_quickstart_advanced[DeepSeek-R1-Distill-Qwen-32B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-32B] From 5cd1740428391a409616e8acac8ddf36666269f6 Mon Sep 17 00:00:00 2001 From: Jenny Liu Date: Wed, 2 Jul 2025 06:55:01 +0000 Subject: [PATCH 04/14] add perf test cases --- tests/integration/defs/perf/test_perf.py | 3 +- .../test_lists/qa/llm_release_digits_perf.txt | 44 ++++++++++++++----- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/tests/integration/defs/perf/test_perf.py b/tests/integration/defs/perf/test_perf.py index cdbeea45971..28c38b25f1c 100644 --- a/tests/integration/defs/perf/test_perf.py +++ b/tests/integration/defs/perf/test_perf.py @@ -82,6 +82,7 @@ "deepseek_r1_distill_qwen_32b": "DeepSeek-R1/DeepSeek-R1-Distill-Qwen-32B", "mixtral_8x22b_v0.1": "Mixtral-8x22B-v0.1", "mistral_7b_v0.1": "mistral-7b-v0.1", + "mistral_7b_v0.3": "Mistral-7B-Instruct-v0.3", "deepseek_r1_fp8": "DeepSeek-R1/DeepSeek-R1", "deepseek_r1_nvfp4": "DeepSeek-R1/DeepSeek-R1-FP4", "deepseek_v3_lite_fp8": "DeepSeek-V3-Lite/fp8", @@ -1379,7 +1380,7 @@ def get_commands(self): data_cmd = self.get_prepare_data_command( engine_dir, input_len, output_len) data_cmds.append(data_cmd) - + print(f"================= data_cmd: {data_cmd}") # Construct MPI command. mpi_cmd = [] if num_gpus > 1 and num_gpus <= 8 and not self._config.runtime == "bench": diff --git a/tests/integration/test_lists/qa/llm_release_digits_perf.txt b/tests/integration/test_lists/qa/llm_release_digits_perf.txt index d4aef72bff7..6936171a5cc 100644 --- a/tests/integration/test_lists/qa/llm_release_digits_perf.txt +++ b/tests/integration/test_lists/qa/llm_release_digits_perf.txt @@ -1,24 +1,44 @@ -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,2048] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-kv_cache_dtype:fp8] # passed +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:128,128] #passed +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:512,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:512,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:128,128] #passed +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128] #passed +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,128] #passed +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,2048] #passed +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-reqs:100-con:2]#passed +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1]#passed +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1]#passed +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,32-kv_cache_dtype:fp8] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,2048] -perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-input_output_len:128,128] +perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-input_output_len:128,128]#passed +perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-input_output_len:512,32]#passed + +perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] +perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-input_output_len:128,128] #passed perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-input_output_len:512,128] +perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:512,128] +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128] +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-streaming-float8-input_output_len:512,32] #passed -perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct-bench-pytorch-float16-input_output_len:128,128] -perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-input_output_len:128,128] +perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] +perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] +perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-kv_cache_dtype:fp8] + +perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] +perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] + +# FP4 cases failed +# ERROR : Arch conditional MMA instruction used without targeting appropriate compute capability. Aborting. + -perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-input_output_len:128,128] -perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-input_output_len:128,128] From 063e5860282c0f219d02e3d29b1bbad441bb4be6 Mon Sep 17 00:00:00 2001 From: Jenny Liu Date: Thu, 3 Jul 2025 01:37:16 +0000 Subject: [PATCH 05/14] update func test --- tests/integration/defs/test_e2e.py | 2 +- tests/integration/test_lists/qa/llm_release_digits_func.txt | 2 +- tests/integration/test_lists/qa/llm_release_digits_perf.txt | 4 ---- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py index d883beb6412..4a8265dbad5 100644 --- a/tests/integration/defs/test_e2e.py +++ b/tests/integration/defs/test_e2e.py @@ -1607,7 +1607,7 @@ def test_ptp_quickstart(llm_root, llm_venv): pytest.param('Mixtral-8x7B-BF16', 'Mixtral-8x7B-Instruct-v0.1', marks=skip_pre_blackwell), - pytest.param('Mixtral-7B-Instruct-v0.3', + pytest.param('Mistral-7B-Instruct-v0.3', 'Mistral-7B-Instruct-v0.3', marks=skip_pre_blackwell), pytest.param('Mistral-Nemo-12b-Base', diff --git a/tests/integration/test_lists/qa/llm_release_digits_func.txt b/tests/integration/test_lists/qa/llm_release_digits_func.txt index 6ea117597e7..7a0c0611bde 100644 --- a/tests/integration/test_lists/qa/llm_release_digits_func.txt +++ b/tests/integration/test_lists/qa/llm_release_digits_func.txt @@ -12,7 +12,7 @@ test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP8-modelopt-hf-model-hub test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP4-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp4] test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-BF16-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1] test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-BF16-Mixtral-8x7B-Instruct-v0.1] -test_e2e.py::test_ptp_quickstart_advanced[Mixtral-7B-Instruct-v0.3-Mixtral-7B-Instruct-v0.3] +test_e2e.py::test_ptp_quickstart_advanced[Mistral-7B-Instruct-v0.3-Mistral-7B-Instruct-v0.3] test_e2e.py::test_ptp_quickstart_advanced[Mistral-Nemo-12b-Base-Mistral-Nemo-Base-2407] test_e2e.py::test_ptp_quickstart_advanced[DeepSeek-R1-Distill-Qwen-32B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-32B] diff --git a/tests/integration/test_lists/qa/llm_release_digits_perf.txt b/tests/integration/test_lists/qa/llm_release_digits_perf.txt index 6936171a5cc..94641a3b128 100644 --- a/tests/integration/test_lists/qa/llm_release_digits_perf.txt +++ b/tests/integration/test_lists/qa/llm_release_digits_perf.txt @@ -38,7 +38,3 @@ perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4 perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] -# FP4 cases failed -# ERROR : Arch conditional MMA instruction used without targeting appropriate compute capability. Aborting. - - From dc95d34afc903ec9378ee12e4f82a25671ac6eed Mon Sep 17 00:00:00 2001 From: Jenny Liu Date: Thu, 3 Jul 2025 01:59:23 +0000 Subject: [PATCH 06/14] delete blanks --- tests/integration/test_lists/qa/llm_release_digits_perf.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integration/test_lists/qa/llm_release_digits_perf.txt b/tests/integration/test_lists/qa/llm_release_digits_perf.txt index 94641a3b128..1c0649db97b 100644 --- a/tests/integration/test_lists/qa/llm_release_digits_perf.txt +++ b/tests/integration/test_lists/qa/llm_release_digits_perf.txt @@ -36,5 +36,4 @@ perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4 perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-kv_cache_dtype:fp8] perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] -perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] - +perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] \ No newline at end of file From e6b226f6d549ed5566e5452cd8737e964f386877 Mon Sep 17 00:00:00 2001 From: Jenny Liu Date: Thu, 3 Jul 2025 02:29:42 +0000 Subject: [PATCH 07/14] Fix whitespace and end-of-file issues in test list Signed-off-by: Jenny Liu --- .../test_lists/qa/llm_release_digits_perf.txt | 39 ++++++++----------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/tests/integration/test_lists/qa/llm_release_digits_perf.txt b/tests/integration/test_lists/qa/llm_release_digits_perf.txt index 1c0649db97b..cef63909bd8 100644 --- a/tests/integration/test_lists/qa/llm_release_digits_perf.txt +++ b/tests/integration/test_lists/qa/llm_release_digits_perf.txt @@ -1,39 +1,34 @@ -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-kv_cache_dtype:fp8] # passed -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:128,128] #passed +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-kv_cache_dtype:fp8] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:128,128] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:512,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:512,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:128,128] #passed -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128] #passed -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,128] #passed -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,2048] #passed -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-reqs:100-con:2]#passed -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1]#passed -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1]#passed -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,32-kv_cache_dtype:fp8] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:512,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:128,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,2048] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-reqs:100-con:2] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,32-kv_cache_dtype:fp8] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,2048] - -perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-input_output_len:128,128]#passed -perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-input_output_len:512,32]#passed - +perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-input_output_len:128,128] +perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-input_output_len:512,32] perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] -perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-input_output_len:128,128] #passed +perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-input_output_len:128,128] perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-input_output_len:512,128] perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-input_output_len:128,128] - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-streaming-float8-input_output_len:512,32] #passed - +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-streaming-float8-input_output_len:512,32] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-kv_cache_dtype:fp8] - perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] -perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] \ No newline at end of file +perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] From 30f33971d5da5fd45edc5fbf38ecdf50a375a8b0 Mon Sep 17 00:00:00 2001 From: Jenny Liu Date: Thu, 3 Jul 2025 05:49:24 +0000 Subject: [PATCH 08/14] Update performance test configuration Signed-off-by: Jenny Liu --- tests/integration/defs/perf/test_perf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/defs/perf/test_perf.py b/tests/integration/defs/perf/test_perf.py index 28c38b25f1c..7dd0f85d777 100644 --- a/tests/integration/defs/perf/test_perf.py +++ b/tests/integration/defs/perf/test_perf.py @@ -1380,7 +1380,6 @@ def get_commands(self): data_cmd = self.get_prepare_data_command( engine_dir, input_len, output_len) data_cmds.append(data_cmd) - print(f"================= data_cmd: {data_cmd}") # Construct MPI command. mpi_cmd = [] if num_gpus > 1 and num_gpus <= 8 and not self._config.runtime == "bench": From 589085e6f2429ebd536f36511480918a06ff41ce Mon Sep 17 00:00:00 2001 From: Jenny Liu Date: Thu, 10 Jul 2025 05:38:15 +0000 Subject: [PATCH 09/14] Add performance test configurations with maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1 parameters - Added 12 model variants with different precision configurations - Includes LLaMA 3.1 8B, LLaMA 3.3 Nemotron Super 49B, LLaMA 3.3 70B, Mixtral 8x7B variants - Added fp8, fp4, float16, and bfloat16 precision variants - All configurations use PyTorch backend with specified performance parameters --- .../test_lists/qa/llm_release_digits_perf.txt | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_lists/qa/llm_release_digits_perf.txt b/tests/integration/test_lists/qa/llm_release_digits_perf.txt index cef63909bd8..91245e35e96 100644 --- a/tests/integration/test_lists/qa/llm_release_digits_perf.txt +++ b/tests/integration/test_lists/qa/llm_release_digits_perf.txt @@ -1,3 +1,19 @@ +# Added configurations with the requested parameters (maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1) +# for each model with different precision variants +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-kv_cache_dtype:fp8] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:128,128] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:512,128] @@ -8,7 +24,6 @@ perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-inp perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,2048] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-reqs:100-con:2] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,32-kv_cache_dtype:fp8] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] @@ -31,4 +46,4 @@ perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4 perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-kv_cache_dtype:fp8] perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] -perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] +perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] \ No newline at end of file From aadd70e277b892c7d6a884128ec809e328664f08 Mon Sep 17 00:00:00 2001 From: Jenny Liu Date: Thu, 10 Jul 2025 05:54:23 +0000 Subject: [PATCH 10/14] change perf test order --- .../test_lists/qa/llm_release_digits_perf.txt | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/tests/integration/test_lists/qa/llm_release_digits_perf.txt b/tests/integration/test_lists/qa/llm_release_digits_perf.txt index 91245e35e96..55fa2d6d97b 100644 --- a/tests/integration/test_lists/qa/llm_release_digits_perf.txt +++ b/tests/integration/test_lists/qa/llm_release_digits_perf.txt @@ -1,33 +1,26 @@ # Added configurations with the requested parameters (maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1) # for each model with different precision variants +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-kv_cache_dtype:fp8] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:128,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:512,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:512,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:128,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,2048] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-reqs:100-con:2] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[llama_v3.3_70b_instruct-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-kv_cache_dtype:fp8] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:512,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:512,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,2048] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-reqs:100-con:2] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,32-kv_cache_dtype:fp8] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,2048] perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-input_output_len:128,128] perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-input_output_len:512,32] perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] @@ -35,15 +28,22 @@ perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-flo perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-input_output_len:512,128] perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-input_output_len:128,128] perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128] perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-streaming-float8-input_output_len:512,32] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] +perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] +perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] +perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,32-kv_cache_dtype:fp8] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,2048] +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-kv_cache_dtype:fp8] -perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] -perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] \ No newline at end of file +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] From b6db2e09dac65eecc1e514d1b1bd52237725729d Mon Sep 17 00:00:00 2001 From: Jenny Liu Date: Thu, 10 Jul 2025 05:55:54 +0000 Subject: [PATCH 11/14] change perf test order --- tests/integration/test_lists/qa/llm_release_digits_perf.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_lists/qa/llm_release_digits_perf.txt b/tests/integration/test_lists/qa/llm_release_digits_perf.txt index 55fa2d6d97b..e14c596e595 100644 --- a/tests/integration/test_lists/qa/llm_release_digits_perf.txt +++ b/tests/integration/test_lists/qa/llm_release_digits_perf.txt @@ -34,13 +34,13 @@ perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct-bench-pytorch-float16-ma perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] -perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,32-kv_cache_dtype:fp8] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,2048] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] From 4cb725a0c04837d83b5781da98897882dcffa75b Mon Sep 17 00:00:00 2001 From: Jenny Liu Date: Thu, 10 Jul 2025 05:59:19 +0000 Subject: [PATCH 12/14] change perf test order Signed-off-by: Jenny Liu --- tests/integration/test_lists/qa/llm_release_digits_perf.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_lists/qa/llm_release_digits_perf.txt b/tests/integration/test_lists/qa/llm_release_digits_perf.txt index e14c596e595..5b8052880e3 100644 --- a/tests/integration/test_lists/qa/llm_release_digits_perf.txt +++ b/tests/integration/test_lists/qa/llm_release_digits_perf.txt @@ -35,8 +35,8 @@ perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8 perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,32-kv_cache_dtype:fp8] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] From 5d774f5b48a6c8eea447e32c8a7680cd15a39476 Mon Sep 17 00:00:00 2001 From: Jenny Liu Date: Thu, 10 Jul 2025 07:58:14 +0000 Subject: [PATCH 13/14] delete some bigger input cases Signed-off-by: Jenny Liu --- .../test_lists/qa/llm_release_digits_perf.txt | 47 +++++++------------ 1 file changed, 17 insertions(+), 30 deletions(-) diff --git a/tests/integration/test_lists/qa/llm_release_digits_perf.txt b/tests/integration/test_lists/qa/llm_release_digits_perf.txt index 5b8052880e3..2ee7a0fc9ab 100644 --- a/tests/integration/test_lists/qa/llm_release_digits_perf.txt +++ b/tests/integration/test_lists/qa/llm_release_digits_perf.txt @@ -1,5 +1,20 @@ -# Added configurations with the requested parameters (maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1) -# for each model with different precision variants +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] +perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] +perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] +perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-kv_cache_dtype:fp8] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:128,128] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:512,128] @@ -7,43 +22,15 @@ perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bflo perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:128,128] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,2048] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-reqs:100-con:2] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-input_output_len:128,128] perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-input_output_len:512,32] -perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-input_output_len:512,128] perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128] perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-streaming-float8-input_output_len:512,32] -perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] -perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] -perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] -perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,32-kv_cache_dtype:fp8] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,2048] -perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] -perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-kv_cache_dtype:fp8] perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] From 2f794ef4d6db9cca38cb0a2efcb48f01db345102 Mon Sep 17 00:00:00 2001 From: Jenny Liu Date: Fri, 11 Jul 2025 07:16:00 +0000 Subject: [PATCH 14/14] waive some cases by bug Signed-off-by: Jenny Liu --- tests/integration/defs/perf/test_perf.py | 2 ++ .../test_lists/qa/llm_release_digits_perf.txt | 16 +++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/tests/integration/defs/perf/test_perf.py b/tests/integration/defs/perf/test_perf.py index 7dd0f85d777..16f9d2776e1 100644 --- a/tests/integration/defs/perf/test_perf.py +++ b/tests/integration/defs/perf/test_perf.py @@ -138,6 +138,7 @@ "mistral_7b_v0.1_hf": "mistralai/Mistral-7B-v0.1", "flan_t5_base_hf": "google/flan-t5-small", "phi_4_mini_instruct_hf": "microsoft/Phi-4-mini-instruct", + "nvila_15b": "nvidia/NVILA-15B", } LORA_MODEL_PATH = { "llama_v2_13b": "llama-models-v2/chinese-llama-2-lora-13b", @@ -1381,6 +1382,7 @@ def get_commands(self): engine_dir, input_len, output_len) data_cmds.append(data_cmd) # Construct MPI command. + print(f"##########JJJJJJ data_cmd: {data_cmd}") mpi_cmd = [] if num_gpus > 1 and num_gpus <= 8 and not self._config.runtime == "bench": if cpu_socket_count_gt_1(): diff --git a/tests/integration/test_lists/qa/llm_release_digits_perf.txt b/tests/integration/test_lists/qa/llm_release_digits_perf.txt index 2ee7a0fc9ab..e7292fa8202 100644 --- a/tests/integration/test_lists/qa/llm_release_digits_perf.txt +++ b/tests/integration/test_lists/qa/llm_release_digits_perf.txt @@ -1,10 +1,10 @@ -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] -perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] #passed +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] #passed +perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] #passed +perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] SKIP (https://nvbugspro.nvidia.com/bug/5387445) +perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] SKIP (https://nvbugspro.nvidia.com/bug/5387445) +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] SKIP (https://nvbugspro.nvidia.com/bug/5387445) +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] SKIP (https://nvbugspro.nvidia.com/bug/5387445) perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] @@ -34,3 +34,5 @@ perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-inp perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-kv_cache_dtype:fp8] perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[nvila_15b-bench-pytorch-bfloat16-input_output_len:128,128]