Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions tests/integration/defs/perf/test_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,19 @@
"ministral_8b": "Ministral-8B-Instruct-2410",
"ministral_8b_fp8": "Ministral-8B-Instruct-2410-FP8",
"gemma_3_1b_it": "gemma/gemma-3-1b-it",
"gemma_3_12b_it": "gemma/gemma-3-12b-it",
"gemma_3_12b_it_fp8": "gemma/gemma-3-12b-it-fp8",
"gemma_3_12b_it_fp4": "gemma/gemma-3-12b-it-FP4",
"gemma_3_27b_it": "gemma/gemma-3-27b-it",
"gemma_3_27b_it_fp8": "gemma/gemma-3-27b-it-fp8",
"gemma_3_27b_it_fp4": "gemma/gemma-3-27b-it-FP4",
"deepseek_r1_fp8": "DeepSeek-R1/DeepSeek-R1",
"deepseek_r1_nvfp4": "DeepSeek-R1/DeepSeek-R1-FP4",
"deepseek_r1_0528_fp8": "DeepSeek-R1/DeepSeek-R1-0528/",
"deepseek_r1_0528_fp4": "DeepSeek-R1/DeepSeek-R1-0528-FP4/",
"deepseek_r1_0528_fp4_v2": "DeepSeek-R1/DeepSeek-R1-0528-FP4-v2/",
"deepseek_r1_distill_llama_70b":
"DeepSeek-R1/DeepSeek-R1-Distill-Llama-70B/",
"deepseek_v3_lite_fp8": "DeepSeek-V3-Lite/fp8",
"deepseek_v3_lite_nvfp4": "DeepSeek-V3-Lite/nvfp4_moe_only",
"qwen2_7b_instruct": "Qwen2-7B-Instruct",
Expand Down Expand Up @@ -127,6 +135,10 @@
"gpt_350m_moe": "gpt2-medium",
"phi_4_mini_instruct": "Phi-4-mini-instruct",
"phi_4_multimodal_instruct": "multimodals/Phi-4-multimodal-instruct",
"phi_4_multimodal_instruct_fp4":
"multimodals/Phi-4-multimodal-instruct-FP4",
"phi_4_multimodal_instruct_fp8":
"multimodals/Phi-4-multimodal-instruct-FP8",
"phi_4_multimodal_instruct_image": "multimodals/Phi-4-multimodal-instruct",
"phi_4_multimodal_instruct_audio": "multimodals/Phi-4-multimodal-instruct",
"phi_4_multimodal_instruct_fp4_image":
Expand All @@ -137,13 +149,30 @@
"multimodals/Phi-4-multimodal-instruct-FP8",
"phi_4_multimodal_instruct_fp8_audio":
"multimodals/Phi-4-multimodal-instruct-FP8",
"qwen2_5_vl_7b_instruct": "multimodals/Qwen2.5-VL-7B-Instruct",
"qwen2_5_vl_7b_instruct_fp8": "multimodals/Qwen2.5-VL-7B-Instruct-FP8",
"qwen2_5_vl_7b_instruct_fp4": "multimodals/Qwen2.5-VL-7B-Instruct-FP4",
"bielik_11b_v2.2_instruct": "Bielik-11B-v2.2-Instruct",
"bielik_11b_v2.2_instruct_fp8": "Bielik-11B-v2.2-Instruct-FP8",
"mistral_small_v3.1_24b": "Mistral-Small-3.1-24B-Instruct-2503",
"gpt_oss_120b_fp4": "gpt_oss/gpt-oss-120b",
"gpt_oss_20b_fp4": "gpt_oss/gpt-oss-20b",
"nemotron_nano_9b_v2": "NVIDIA-Nemotron-Nano-12B-v2",
"nvidia_nemotron_nano_9b_v2_nvfp4": "NVIDIA-Nemotron-Nano-9B-v2-NVFP4",
"starcoder2_7b": "starcoder2-7b",
"qwen3_8b": "Qwen3/Qwen3-8B",
"qwen3_8b_fp8": "Qwen3/nvidia-Qwen3-8B-FP8",
"qwen3_8b_fp4": "Qwen3/nvidia-Qwen3-8B-NVFP4",
"qwen3_14b_fp8": "Qwen3/nvidia-Qwen3-14B-FP8",
"qwen3_14b_fp4": "Qwen3/nvidia-Qwen3-14B-NVFP4",
"qwen3_14b": "Qwen3/Qwen3-14B",
"qwen3_30b_a3b": "Qwen3/Qwen3-30B-A3B",
"qwen3_30b_a3b_fp4": "Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf",
"qwen3_32b": "Qwen3/Qwen3-32B",
"qwen3_32b_fp4": "Qwen3/nvidia-Qwen3-32B-NVFP4",
"phi_4_reasoning_plus": "Phi-4/Phi-4-reasoning-plus",
"phi_4_reasoning_plus_fp8": "nvidia-Phi-4-reasoning-plus-FP8",
"phi_4_reasoning_plus_fp4": "nvidia-Phi-4-reasoning-plus-NVFP4",
}
# Model PATH of HuggingFace
HF_MODEL_PATH = {
Expand Down
28 changes: 0 additions & 28 deletions tests/integration/test_lists/qa/llm_digits_perf.txt

This file was deleted.

53 changes: 53 additions & 0 deletions tests/integration/test_lists/qa/llm_digits_perf.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
llm_digits_perf:
- condition:
ranges:
system_gpu_count:
gte: 1
lte: 1
wildcards:
gpu:
- '*gb10*'
linux_distribution_name: ubuntu*
cpu: aarch64
terms:
backend: pytorch
tests:
- perf/test_perf.py::test_perf[gpt_oss_20b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[gpt_oss_120b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[nvidia_nemotron_nano_9b_v2_nvfp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[qwen3_8b_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[qwen3_8b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[qwen3_8b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[qwen3_14b_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[qwen3_14b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[qwen3_14b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[qwen3_30b_a3b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[qwen3_30b_a3b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[llama_v4_scout_17b_16e_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[phi_4_reasoning_plus_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[phi_4_reasoning_plus_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[phi_4_reasoning_plus-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[qwen3_32b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[qwen3_32b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[llama_v3.1_70b_instruct_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[phi_4_multimodal_instruct_fp4-bench-pytorch-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[phi_4_multimodal_instruct_fp8-bench-pytorch-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[qwen2_5_vl_7b_instruct-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[qwen2_5_vl_7b_instruct_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[qwen2_5_vl_7b_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[gemma_3_12b_it-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[gemma_3_12b_it_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[gemma_3_12b_it_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[gemma_3_27b_it-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[gemma_3_27b_it_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
- perf/test_perf.py::test_perf[gemma_3_27b_it_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]