diff --git a/tests/integration/defs/perf/test_perf.py b/tests/integration/defs/perf/test_perf.py index cdbeea45971..16f9d2776e1 100644 --- a/tests/integration/defs/perf/test_perf.py +++ b/tests/integration/defs/perf/test_perf.py @@ -82,6 +82,7 @@ "deepseek_r1_distill_qwen_32b": "DeepSeek-R1/DeepSeek-R1-Distill-Qwen-32B", "mixtral_8x22b_v0.1": "Mixtral-8x22B-v0.1", "mistral_7b_v0.1": "mistral-7b-v0.1", + "mistral_7b_v0.3": "Mistral-7B-Instruct-v0.3", "deepseek_r1_fp8": "DeepSeek-R1/DeepSeek-R1", "deepseek_r1_nvfp4": "DeepSeek-R1/DeepSeek-R1-FP4", "deepseek_v3_lite_fp8": "DeepSeek-V3-Lite/fp8", @@ -137,6 +138,7 @@ "mistral_7b_v0.1_hf": "mistralai/Mistral-7B-v0.1", "flan_t5_base_hf": "google/flan-t5-small", "phi_4_mini_instruct_hf": "microsoft/Phi-4-mini-instruct", + "nvila_15b": "nvidia/NVILA-15B", } LORA_MODEL_PATH = { "llama_v2_13b": "llama-models-v2/chinese-llama-2-lora-13b", @@ -1379,8 +1381,8 @@ def get_commands(self): data_cmd = self.get_prepare_data_command( engine_dir, input_len, output_len) data_cmds.append(data_cmd) - # Construct MPI command. + print(f"##########JJJJJJ data_cmd: {data_cmd}") mpi_cmd = [] if num_gpus > 1 and num_gpus <= 8 and not self._config.runtime == "bench": if cpu_socket_count_gt_1(): diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py index 145c069fd93..4a8265dbad5 100644 --- a/tests/integration/defs/test_e2e.py +++ b/tests/integration/defs/test_e2e.py @@ -1607,6 +1607,9 @@ def test_ptp_quickstart(llm_root, llm_venv): pytest.param('Mixtral-8x7B-BF16', 'Mixtral-8x7B-Instruct-v0.1', marks=skip_pre_blackwell), + pytest.param('Mistral-7B-Instruct-v0.3', + 'Mistral-7B-Instruct-v0.3', + marks=skip_pre_blackwell), pytest.param('Mistral-Nemo-12b-Base', 'Mistral-Nemo-Base-2407', marks=skip_pre_blackwell), @@ -2012,6 +2015,20 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path, ], ], }, + "NVILA-15B-FP16": { + "image": [ + ["stormy", "ocean", "waves", "clouds", "gray", "sky"], + ["rock", "formation", "sunny", "sky", "clouds"], + ["road", "busy", "car", "black", "blue"], + ], + "video": [ + ["woman", "street", "night", "walking", "camera"], + [ + "stunning", "earth", "space", "planet", "curvature", "dark", + "bright", "contrast", "illuminate" + ], + ], + }, "llava-v1.6-mistral-7b": { "image": [ [ diff --git a/tests/integration/test_lists/qa/llm_release_digits_func.txt b/tests/integration/test_lists/qa/llm_release_digits_func.txt index 00d0bac895e..7a0c0611bde 100644 --- a/tests/integration/test_lists/qa/llm_release_digits_func.txt +++ b/tests/integration/test_lists/qa/llm_release_digits_func.txt @@ -12,6 +12,7 @@ test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP8-modelopt-hf-model-hub test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP4-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp4] test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-BF16-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1] test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-BF16-Mixtral-8x7B-Instruct-v0.1] +test_e2e.py::test_ptp_quickstart_advanced[Mistral-7B-Instruct-v0.3-Mistral-7B-Instruct-v0.3] test_e2e.py::test_ptp_quickstart_advanced[Mistral-Nemo-12b-Base-Mistral-Nemo-Base-2407] test_e2e.py::test_ptp_quickstart_advanced[DeepSeek-R1-Distill-Qwen-32B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-32B] diff --git a/tests/integration/test_lists/qa/llm_release_digits_perf.txt b/tests/integration/test_lists/qa/llm_release_digits_perf.txt index a216f04c302..e7292fa8202 100644 --- a/tests/integration/test_lists/qa/llm_release_digits_perf.txt +++ b/tests/integration/test_lists/qa/llm_release_digits_perf.txt @@ -1,28 +1,38 @@ -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-streaming-bfloat16-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-streaming-bfloat16-input_output_len:512,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:512,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] #passed +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] #passed +perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] #passed +perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] SKIP (https://nvbugspro.nvidia.com/bug/5387445) +perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] SKIP (https://nvbugspro.nvidia.com/bug/5387445) +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] SKIP (https://nvbugspro.nvidia.com/bug/5387445) +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] SKIP (https://nvbugspro.nvidia.com/bug/5387445) +perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] +perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] +perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] +perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-maxbs:1-maxnt:320-input_output_len:128,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-kv_cache_dtype:fp8] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:128,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:512,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:512,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:128,128] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128] perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,2048] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] -perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,2048] - -perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-input_output_len:128,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-reqs:100-con:2] +perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-input_output_len:128,128] +perf/test_perf.py::test_perf[mistral_7b_v0.3-bench-pytorch-float16-input_output_len:512,32] perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-input_output_len:512,128] - +perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-input_output_len:128,128] perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:512,128] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] -perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] - -perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct-bench-pytorch-float16-input_output_len:128,128] -perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-input_output_len:128,128] -perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-streaming-float8-input_output_len:512,32] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:512,32-kv_cache_dtype:fp8] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] -perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:512,128] - -perf/test_perf.py::test_perf[mistral_nemo_12b_base-bench-pytorch-float16-input_output_len:128,128] -perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-float16-input_output_len:128,128] +perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-kv_cache_dtype:fp8] +perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128] +perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-float4-maxbs:1-maxnt:3000-input_output_len:2048,128-reqs:32-con:1] +perf/test_perf.py::test_perf[nvila_15b-bench-pytorch-bfloat16-input_output_len:128,128]