From 4b344a7c6d22ddc15f04031d2e64cc5b19d8f482 Mon Sep 17 00:00:00 2001 From: Jenny Liu Date: Wed, 7 Jan 2026 09:29:06 +0000 Subject: [PATCH 1/2] Add DGX-Spark performance QA test cases for single node Signed-off-by: Jenny Liu --- tests/integration/defs/perf/test_perf.py | 20 +++++++++ .../test_lists/qa/llm_digits_perf.txt | 28 ------------ .../test_lists/qa/llm_digits_perf.yml | 44 +++++++++++++++++++ 3 files changed, 64 insertions(+), 28 deletions(-) delete mode 100644 tests/integration/test_lists/qa/llm_digits_perf.txt create mode 100644 tests/integration/test_lists/qa/llm_digits_perf.yml diff --git a/tests/integration/defs/perf/test_perf.py b/tests/integration/defs/perf/test_perf.py index df4c72a8390..0630903daea 100644 --- a/tests/integration/defs/perf/test_perf.py +++ b/tests/integration/defs/perf/test_perf.py @@ -100,6 +100,8 @@ "deepseek_r1_0528_fp8": "DeepSeek-R1/DeepSeek-R1-0528/", "deepseek_r1_0528_fp4": "DeepSeek-R1/DeepSeek-R1-0528-FP4/", "deepseek_r1_0528_fp4_v2": "DeepSeek-R1/DeepSeek-R1-0528-FP4-v2/", + "deepseek_r1_distill_llama_70b": + "DeepSeek-R1/DeepSeek-R1-Distill-Llama-70B/", "deepseek_v3_lite_fp8": "DeepSeek-V3-Lite/fp8", "deepseek_v3_lite_nvfp4": "DeepSeek-V3-Lite/nvfp4_moe_only", "qwen2_7b_instruct": "Qwen2-7B-Instruct", @@ -127,6 +129,10 @@ "gpt_350m_moe": "gpt2-medium", "phi_4_mini_instruct": "Phi-4-mini-instruct", "phi_4_multimodal_instruct": "multimodals/Phi-4-multimodal-instruct", + "phi_4_multimodal_instruct_fp4": + "multimodals/Phi-4-multimodal-instruct-FP4", + "phi_4_multimodal_instruct_fp8": + "multimodals/Phi-4-multimodal-instruct-FP8", "phi_4_multimodal_instruct_image": "multimodals/Phi-4-multimodal-instruct", "phi_4_multimodal_instruct_audio": "multimodals/Phi-4-multimodal-instruct", "phi_4_multimodal_instruct_fp4_image": @@ -143,7 +149,21 @@ "gpt_oss_120b_fp4": "gpt_oss/gpt-oss-120b", "gpt_oss_20b_fp4": "gpt_oss/gpt-oss-20b", "nemotron_nano_9b_v2": "NVIDIA-Nemotron-Nano-12B-v2", + "nvidia_nemotron_nano_9b_v2_nvfp4": "NVIDIA-Nemotron-Nano-9B-v2-NVFP4", "starcoder2_7b": "starcoder2-7b", + "qwen3_8b": "Qwen3/Qwen3-8B", + "qwen3_8b_fp8": "Qwen3/nvidia-Qwen3-8B-FP8", + "qwen3_8b_fp4": "Qwen3/nvidia-Qwen3-8B-NVFP4", + "qwen3_14b_fp8": "Qwen3/nvidia-Qwen3-14B-FP8", + "qwen3_14b_fp4": "Qwen3/nvidia-Qwen3-14B-NVFP4", + "qwen3_14b": "Qwen3/Qwen3-14B", + "qwen3_30b_a3b": "Qwen3/Qwen3-30B-A3B", + "qwen3_30b_a3b_fp4": "Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf", + "qwen3_32b": "Qwen3/Qwen3-32B", + "qwen3_32b_fp4": "Qwen3/nvidia-Qwen3-32B-NVFP4", + "phi_4_reasoning_plus": "Phi-4/Phi-4-reasoning-plus", + "phi_4_reasoning_plus_fp8": "nvidia-Phi-4-reasoning-plus-FP8", + "phi_4_reasoning_plus_fp4": "nvidia-Phi-4-reasoning-plus-NVFP4", } # Model PATH of HuggingFace HF_MODEL_PATH = { diff --git a/tests/integration/test_lists/qa/llm_digits_perf.txt b/tests/integration/test_lists/qa/llm_digits_perf.txt deleted file mode 100644 index a216f04c302..00000000000 --- a/tests/integration/test_lists/qa/llm_digits_perf.txt +++ /dev/null @@ -1,28 +0,0 @@ -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-streaming-bfloat16-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-streaming-bfloat16-input_output_len:512,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:512,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,2048] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,2048] - -perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-input_output_len:512,128] - -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:512,128] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] - -perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct-bench-pytorch-float16-input_output_len:128,128] -perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-input_output_len:128,128] -perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] -perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] -perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] - -perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-input_output_len:128,128] -perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-input_output_len:128,128] diff --git a/tests/integration/test_lists/qa/llm_digits_perf.yml b/tests/integration/test_lists/qa/llm_digits_perf.yml new file mode 100644 index 00000000000..aba6fd6015a --- /dev/null +++ b/tests/integration/test_lists/qa/llm_digits_perf.yml @@ -0,0 +1,44 @@ +llm_digits_perf: +- condition: + ranges: + system_gpu_count: + gte: 1 + lte: 1 + wildcards: + gpu: + - '*gb10*' + linux_distribution_name: ubuntu* + cpu: aarch64 + terms: + backend: pytorch + tests: + - perf/test_perf.py::test_perf[gpt_oss_20b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[gpt_oss_120b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[nvidia_nemotron_nano_9b_v2_nvfp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[qwen3_8b_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[qwen3_8b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[qwen3_8b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[qwen3_14b_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[qwen3_14b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[qwen3_14b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[qwen3_30b_a3b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[qwen3_30b_a3b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[llama_v4_scout_17b_16e_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[phi_4_reasoning_plus_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[phi_4_reasoning_plus_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[phi_4_reasoning_plus-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[qwen3_32b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[qwen3_32b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[llama_v3.1_70b_instruct_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[phi_4_multimodal_instruct_fp4-bench-pytorch-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[phi_4_multimodal_instruct_fp8-bench-pytorch-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1] From a95751e8892e3d87dc62c15418c80d459d3a9895 Mon Sep 17 00:00:00 2001 From: Jenny Liu Date: Wed, 7 Jan 2026 10:07:58 +0000 Subject: [PATCH 2/2] add more vlm perf cases Signed-off-by: Jenny Liu --- tests/integration/defs/perf/test_perf.py | 9 +++++++++ tests/integration/test_lists/qa/llm_digits_perf.yml | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/tests/integration/defs/perf/test_perf.py b/tests/integration/defs/perf/test_perf.py index 0630903daea..4534d8c39d4 100644 --- a/tests/integration/defs/perf/test_perf.py +++ b/tests/integration/defs/perf/test_perf.py @@ -95,6 +95,12 @@ "ministral_8b": "Ministral-8B-Instruct-2410", "ministral_8b_fp8": "Ministral-8B-Instruct-2410-FP8", "gemma_3_1b_it": "gemma/gemma-3-1b-it", + "gemma_3_12b_it": "gemma/gemma-3-12b-it", + "gemma_3_12b_it_fp8": "gemma/gemma-3-12b-it-fp8", + "gemma_3_12b_it_fp4": "gemma/gemma-3-12b-it-FP4", + "gemma_3_27b_it": "gemma/gemma-3-27b-it", + "gemma_3_27b_it_fp8": "gemma/gemma-3-27b-it-fp8", + "gemma_3_27b_it_fp4": "gemma/gemma-3-27b-it-FP4", "deepseek_r1_fp8": "DeepSeek-R1/DeepSeek-R1", "deepseek_r1_nvfp4": "DeepSeek-R1/DeepSeek-R1-FP4", "deepseek_r1_0528_fp8": "DeepSeek-R1/DeepSeek-R1-0528/", @@ -143,6 +149,9 @@ "multimodals/Phi-4-multimodal-instruct-FP8", "phi_4_multimodal_instruct_fp8_audio": "multimodals/Phi-4-multimodal-instruct-FP8", + "qwen2_5_vl_7b_instruct": "multimodals/Qwen2.5-VL-7B-Instruct", + "qwen2_5_vl_7b_instruct_fp8": "multimodals/Qwen2.5-VL-7B-Instruct-FP8", + "qwen2_5_vl_7b_instruct_fp4": "multimodals/Qwen2.5-VL-7B-Instruct-FP4", "bielik_11b_v2.2_instruct": "Bielik-11B-v2.2-Instruct", "bielik_11b_v2.2_instruct_fp8": "Bielik-11B-v2.2-Instruct-FP8", "mistral_small_v3.1_24b": "Mistral-Small-3.1-24B-Instruct-2503", diff --git a/tests/integration/test_lists/qa/llm_digits_perf.yml b/tests/integration/test_lists/qa/llm_digits_perf.yml index aba6fd6015a..54b19a8168b 100644 --- a/tests/integration/test_lists/qa/llm_digits_perf.yml +++ b/tests/integration/test_lists/qa/llm_digits_perf.yml @@ -42,3 +42,12 @@ llm_digits_perf: - perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1] - perf/test_perf.py::test_perf[phi_4_multimodal_instruct_fp4-bench-pytorch-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1] - perf/test_perf.py::test_perf[phi_4_multimodal_instruct_fp8-bench-pytorch-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[qwen2_5_vl_7b_instruct-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[qwen2_5_vl_7b_instruct_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[qwen2_5_vl_7b_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[gemma_3_12b_it-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[gemma_3_12b_it_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[gemma_3_12b_it_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[gemma_3_27b_it-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[gemma_3_27b_it_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1] + - perf/test_perf.py::test_perf[gemma_3_27b_it_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]