Skip to content
4 changes: 3 additions & 1 deletion tests/integration/defs/perf/test_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
"deepseek_r1_distill_qwen_32b": "DeepSeek-R1/DeepSeek-R1-Distill-Qwen-32B",
"mixtral_8x22b_v0.1": "Mixtral-8x22B-v0.1",
"mistral_7b_v0.1": "mistral-7b-v0.1",
"mistral_7b_v0.3": "Mistral-7B-Instruct-v0.3",
"deepseek_r1_fp8": "DeepSeek-R1/DeepSeek-R1",
"deepseek_r1_nvfp4": "DeepSeek-R1/DeepSeek-R1-FP4",
"deepseek_v3_lite_fp8": "DeepSeek-V3-Lite/fp8",
Expand Down Expand Up @@ -137,6 +138,7 @@
"mistral_7b_v0.1_hf": "mistralai/Mistral-7B-v0.1",
"flan_t5_base_hf": "google/flan-t5-small",
"phi_4_mini_instruct_hf": "microsoft/Phi-4-mini-instruct",
"nvila_15b": "nvidia/NVILA-15B",
}
LORA_MODEL_PATH = {
"llama_v2_13b": "llama-models-v2/chinese-llama-2-lora-13b",
Expand Down Expand Up @@ -1379,8 +1381,8 @@ def get_commands(self):
data_cmd = self.get_prepare_data_command(
engine_dir, input_len, output_len)
data_cmds.append(data_cmd)

# Construct MPI command.
print(f"##########JJJJJJ data_cmd: {data_cmd}")
mpi_cmd = []
if num_gpus > 1 and num_gpus <= 8 and not self._config.runtime == "bench":
if cpu_socket_count_gt_1():
Expand Down
17 changes: 17 additions & 0 deletions tests/integration/defs/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -1607,6 +1607,9 @@ def test_ptp_quickstart(llm_root, llm_venv):
pytest.param('Mixtral-8x7B-BF16',
'Mixtral-8x7B-Instruct-v0.1',
marks=skip_pre_blackwell),
pytest.param('Mistral-7B-Instruct-v0.3',
'Mistral-7B-Instruct-v0.3',
marks=skip_pre_blackwell),
pytest.param('Mistral-Nemo-12b-Base',
'Mistral-Nemo-Base-2407',
marks=skip_pre_blackwell),
Expand Down Expand Up @@ -2012,6 +2015,20 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path,
],
],
},
"NVILA-15B-FP16": {
"image": [
["stormy", "ocean", "waves", "clouds", "gray", "sky"],
["rock", "formation", "sunny", "sky", "clouds"],
["road", "busy", "car", "black", "blue"],
],
"video": [
["woman", "street", "night", "walking", "camera"],
[
"stunning", "earth", "space", "planet", "curvature", "dark",
"bright", "contrast", "illuminate"
],
],
},
"llava-v1.6-mistral-7b": {
"image": [
[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP8-modelopt-hf-model-hub
test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP4-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp4]
test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-BF16-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1]
test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-BF16-Mixtral-8x7B-Instruct-v0.1]
test_e2e.py::test_ptp_quickstart_advanced[Mistral-7B-Instruct-v0.3-Mistral-7B-Instruct-v0.3]
test_e2e.py::test_ptp_quickstart_advanced[Mistral-Nemo-12b-Base-Mistral-Nemo-Base-2407]
test_e2e.py::test_ptp_quickstart_advanced[DeepSeek-R1-Distill-Qwen-32B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-32B]

Expand Down
56 changes: 33 additions & 23 deletions tests/integration/test_lists/qa/llm_release_digits_perf.txt
Original file line number Diff line number Diff line change
@@ -1,28 +1,38 @@
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-streaming-bfloat16-input_output_len:128,128]
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-streaming-bfloat16-input_output_len:512,128]
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:128,128]
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:512,128]
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] #passed
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] #passed
perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] #passed
perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] SKIP (https://nvbugspro.nvidia.com/bug/5387445)
perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] SKIP (https://nvbugspro.nvidia.com/bug/5387445)
perf/test_perf.py::test_perf[llama_v3.3_70b_instruct-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] SKIP (https://nvbugspro.nvidia.com/bug/5387445)
perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] SKIP (https://nvbugspro.nvidia.com/bug/5387445)
perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1]
perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1]
perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1]
perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1]
perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1]
perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1]
perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1]
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1]
perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1]
perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1]
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-kv_cache_dtype:fp8]
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:128,128]
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:512,128]
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:512,128]
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:128,128]
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128]
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,128]
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,2048]
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128]
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128]
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,2048]

perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-input_output_len:128,128]
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-reqs:100-con:2]
perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-input_output_len:128,128]
perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-input_output_len:512,32]
perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-input_output_len:128,128]
perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-input_output_len:512,128]

perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-input_output_len:128,128]
perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128]
perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:512,128]
perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128]
perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128]

perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct-bench-pytorch-float16-input_output_len:128,128]
perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-input_output_len:128,128]
perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128]
perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-streaming-float8-input_output_len:512,32]
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,32-kv_cache_dtype:fp8]
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128]
perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128]
perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128]

perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-input_output_len:128,128]
perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-input_output_len:128,128]
perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-kv_cache_dtype:fp8]
perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128]
perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1]
perf/test_perf.py::test_perf[nvila_15b-bench-pytorch-bfloat16-input_output_len:128,128]