From 4b344a7c6d22ddc15f04031d2e64cc5b19d8f482 Mon Sep 17 00:00:00 2001
From: Jenny Liu <JennyLiu-nv+JennyLiu@users.noreply.github.com>
Date: Wed, 7 Jan 2026 09:29:06 +0000
Subject: [PATCH 1/2] Add DGX-Spark performance QA test cases for single node

Signed-off-by: Jenny Liu <JennyLiu-nv+JennyLiu@users.noreply.github.com>
---
 tests/integration/defs/perf/test_perf.py      | 20 +++++++++
 .../test_lists/qa/llm_digits_perf.txt         | 28 ------------
 .../test_lists/qa/llm_digits_perf.yml         | 44 +++++++++++++++++++
 3 files changed, 64 insertions(+), 28 deletions(-)
 delete mode 100644 tests/integration/test_lists/qa/llm_digits_perf.txt
 create mode 100644 tests/integration/test_lists/qa/llm_digits_perf.yml

diff --git a/tests/integration/defs/perf/test_perf.py b/tests/integration/defs/perf/test_perf.py
index df4c72a8390..0630903daea 100644
--- a/tests/integration/defs/perf/test_perf.py
+++ b/tests/integration/defs/perf/test_perf.py
@@ -100,6 +100,8 @@
     "deepseek_r1_0528_fp8": "DeepSeek-R1/DeepSeek-R1-0528/",
     "deepseek_r1_0528_fp4": "DeepSeek-R1/DeepSeek-R1-0528-FP4/",
     "deepseek_r1_0528_fp4_v2": "DeepSeek-R1/DeepSeek-R1-0528-FP4-v2/",
+    "deepseek_r1_distill_llama_70b":
+    "DeepSeek-R1/DeepSeek-R1-Distill-Llama-70B/",
     "deepseek_v3_lite_fp8": "DeepSeek-V3-Lite/fp8",
     "deepseek_v3_lite_nvfp4": "DeepSeek-V3-Lite/nvfp4_moe_only",
     "qwen2_7b_instruct": "Qwen2-7B-Instruct",
@@ -127,6 +129,10 @@
     "gpt_350m_moe": "gpt2-medium",
     "phi_4_mini_instruct": "Phi-4-mini-instruct",
     "phi_4_multimodal_instruct": "multimodals/Phi-4-multimodal-instruct",
+    "phi_4_multimodal_instruct_fp4":
+    "multimodals/Phi-4-multimodal-instruct-FP4",
+    "phi_4_multimodal_instruct_fp8":
+    "multimodals/Phi-4-multimodal-instruct-FP8",
     "phi_4_multimodal_instruct_image": "multimodals/Phi-4-multimodal-instruct",
     "phi_4_multimodal_instruct_audio": "multimodals/Phi-4-multimodal-instruct",
     "phi_4_multimodal_instruct_fp4_image":
@@ -143,7 +149,21 @@
     "gpt_oss_120b_fp4": "gpt_oss/gpt-oss-120b",
     "gpt_oss_20b_fp4": "gpt_oss/gpt-oss-20b",
     "nemotron_nano_9b_v2": "NVIDIA-Nemotron-Nano-12B-v2",
+    "nvidia_nemotron_nano_9b_v2_nvfp4": "NVIDIA-Nemotron-Nano-9B-v2-NVFP4",
     "starcoder2_7b": "starcoder2-7b",
+    "qwen3_8b": "Qwen3/Qwen3-8B",
+    "qwen3_8b_fp8": "Qwen3/nvidia-Qwen3-8B-FP8",
+    "qwen3_8b_fp4": "Qwen3/nvidia-Qwen3-8B-NVFP4",
+    "qwen3_14b_fp8": "Qwen3/nvidia-Qwen3-14B-FP8",
+    "qwen3_14b_fp4": "Qwen3/nvidia-Qwen3-14B-NVFP4",
+    "qwen3_14b": "Qwen3/Qwen3-14B",
+    "qwen3_30b_a3b": "Qwen3/Qwen3-30B-A3B",
+    "qwen3_30b_a3b_fp4": "Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf",
+    "qwen3_32b": "Qwen3/Qwen3-32B",
+    "qwen3_32b_fp4": "Qwen3/nvidia-Qwen3-32B-NVFP4",
+    "phi_4_reasoning_plus": "Phi-4/Phi-4-reasoning-plus",
+    "phi_4_reasoning_plus_fp8": "nvidia-Phi-4-reasoning-plus-FP8",
+    "phi_4_reasoning_plus_fp4": "nvidia-Phi-4-reasoning-plus-NVFP4",
 }
 # Model PATH of HuggingFace
 HF_MODEL_PATH = {
diff --git a/tests/integration/test_lists/qa/llm_digits_perf.txt b/tests/integration/test_lists/qa/llm_digits_perf.txt
deleted file mode 100644
index a216f04c302..00000000000
--- a/tests/integration/test_lists/qa/llm_digits_perf.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-streaming-bfloat16-input_output_len:128,128]
-perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-streaming-bfloat16-input_output_len:512,128]
-perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:128,128]
-perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:512,128]
-perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128]
-perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,128]
-perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,2048]
-perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128]
-perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128]
-perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,2048]
-
-perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-input_output_len:128,128]
-perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-input_output_len:128,128]
-perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-input_output_len:512,128]
-
-perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128]
-perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:512,128]
-perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128]
-perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128]
-
-perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct-bench-pytorch-float16-input_output_len:128,128]
-perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-input_output_len:128,128]
-perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128]
-perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128]
-perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128]
-
-perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-input_output_len:128,128]
-perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-input_output_len:128,128]
diff --git a/tests/integration/test_lists/qa/llm_digits_perf.yml b/tests/integration/test_lists/qa/llm_digits_perf.yml
new file mode 100644
index 00000000000..aba6fd6015a
--- /dev/null
+++ b/tests/integration/test_lists/qa/llm_digits_perf.yml
@@ -0,0 +1,44 @@
+llm_digits_perf:
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+      - '*gb10*'
+      linux_distribution_name: ubuntu*
+      cpu: aarch64
+    terms:
+      backend: pytorch
+  tests:
+    - perf/test_perf.py::test_perf[gpt_oss_20b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[gpt_oss_120b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[nvidia_nemotron_nano_9b_v2_nvfp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[qwen3_8b_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[qwen3_8b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[qwen3_8b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[qwen3_14b_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[qwen3_14b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[qwen3_14b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[qwen3_30b_a3b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[qwen3_30b_a3b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[llama_v4_scout_17b_16e_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[phi_4_reasoning_plus_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[phi_4_reasoning_plus_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[phi_4_reasoning_plus-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[qwen3_32b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[qwen3_32b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[llama_v3.1_70b_instruct_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[phi_4_multimodal_instruct_fp4-bench-pytorch-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[phi_4_multimodal_instruct_fp8-bench-pytorch-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]

From a95751e8892e3d87dc62c15418c80d459d3a9895 Mon Sep 17 00:00:00 2001
From: Jenny Liu <JennyLiu-nv+JennyLiu@users.noreply.github.com>
Date: Wed, 7 Jan 2026 10:07:58 +0000
Subject: [PATCH 2/2] add more vlm perf cases

Signed-off-by: Jenny Liu <JennyLiu-nv+JennyLiu@users.noreply.github.com>
---
 tests/integration/defs/perf/test_perf.py            | 9 +++++++++
 tests/integration/test_lists/qa/llm_digits_perf.yml | 9 +++++++++
 2 files changed, 18 insertions(+)

diff --git a/tests/integration/defs/perf/test_perf.py b/tests/integration/defs/perf/test_perf.py
index 0630903daea..4534d8c39d4 100644
--- a/tests/integration/defs/perf/test_perf.py
+++ b/tests/integration/defs/perf/test_perf.py
@@ -95,6 +95,12 @@
     "ministral_8b": "Ministral-8B-Instruct-2410",
     "ministral_8b_fp8": "Ministral-8B-Instruct-2410-FP8",
     "gemma_3_1b_it": "gemma/gemma-3-1b-it",
+    "gemma_3_12b_it": "gemma/gemma-3-12b-it",
+    "gemma_3_12b_it_fp8": "gemma/gemma-3-12b-it-fp8",
+    "gemma_3_12b_it_fp4": "gemma/gemma-3-12b-it-FP4",
+    "gemma_3_27b_it": "gemma/gemma-3-27b-it",
+    "gemma_3_27b_it_fp8": "gemma/gemma-3-27b-it-fp8",
+    "gemma_3_27b_it_fp4": "gemma/gemma-3-27b-it-FP4",
     "deepseek_r1_fp8": "DeepSeek-R1/DeepSeek-R1",
     "deepseek_r1_nvfp4": "DeepSeek-R1/DeepSeek-R1-FP4",
     "deepseek_r1_0528_fp8": "DeepSeek-R1/DeepSeek-R1-0528/",
@@ -143,6 +149,9 @@
     "multimodals/Phi-4-multimodal-instruct-FP8",
     "phi_4_multimodal_instruct_fp8_audio":
     "multimodals/Phi-4-multimodal-instruct-FP8",
+    "qwen2_5_vl_7b_instruct": "multimodals/Qwen2.5-VL-7B-Instruct",
+    "qwen2_5_vl_7b_instruct_fp8": "multimodals/Qwen2.5-VL-7B-Instruct-FP8",
+    "qwen2_5_vl_7b_instruct_fp4": "multimodals/Qwen2.5-VL-7B-Instruct-FP4",
     "bielik_11b_v2.2_instruct": "Bielik-11B-v2.2-Instruct",
     "bielik_11b_v2.2_instruct_fp8": "Bielik-11B-v2.2-Instruct-FP8",
     "mistral_small_v3.1_24b": "Mistral-Small-3.1-24B-Instruct-2503",
diff --git a/tests/integration/test_lists/qa/llm_digits_perf.yml b/tests/integration/test_lists/qa/llm_digits_perf.yml
index aba6fd6015a..54b19a8168b 100644
--- a/tests/integration/test_lists/qa/llm_digits_perf.yml
+++ b/tests/integration/test_lists/qa/llm_digits_perf.yml
@@ -42,3 +42,12 @@ llm_digits_perf:
     - perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
     - perf/test_perf.py::test_perf[phi_4_multimodal_instruct_fp4-bench-pytorch-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
     - perf/test_perf.py::test_perf[phi_4_multimodal_instruct_fp8-bench-pytorch-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[qwen2_5_vl_7b_instruct-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[qwen2_5_vl_7b_instruct_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[qwen2_5_vl_7b_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[gemma_3_12b_it-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[gemma_3_12b_it_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[gemma_3_12b_it_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[gemma_3_27b_it-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[gemma_3_27b_it_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    - perf/test_perf.py::test_perf[gemma_3_27b_it_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]