Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion tests/integration/defs/perf/test_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@
"nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1",
"llama_v3.3_nemotron_super_49b_fp8":
"nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1-FP8",
"llama_v3.3_nemotron_super_49b_v1.5_fp8":
"nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1_5-FP8",
"llama_v3.1_nemotron_ultra_253b":
"nemotron-nas/Llama-3_1-Nemotron-Ultra-253B-v1",
"llama_v3.1_nemotron_ultra_253b_fp8":
Expand All @@ -90,11 +92,16 @@
"modelopt-hf-model-hub/Mixtral-8x7B-Instruct-v0.1-fp4",
"mistral_nemo_12b_base": "Mistral-Nemo-Base-2407",
"deepseek_r1_distill_qwen_32b": "DeepSeek-R1/DeepSeek-R1-Distill-Qwen-32B",
"deepseek_r1_distill_llama_70b":
"DeepSeek-R1/DeepSeek-R1-Distill-Llama-70B/",
"mixtral_8x22b_v0.1": "Mixtral-8x22B-v0.1",
"mistral_7b_v0.1": "mistral-7b-v0.1",
"ministral_8b": "Ministral-8B-Instruct-2410",
"ministral_8b_fp8": "Ministral-8B-Instruct-2410-FP8",
"gemma_3_1b_it": "gemma/gemma-3-1b-it",
"gemma_3_27b_it": "gemma/gemma-3-27b-it",
"gemma_3_27b_it_fp8": "gemma/gemma-3-27b-it-fp8",
"gemma_3_27b_it_fp4": "gemma/gemma-3-27b-it-FP4",
"deepseek_r1_fp8": "DeepSeek-R1/DeepSeek-R1",
"deepseek_r1_nvfp4": "DeepSeek-R1/DeepSeek-R1-FP4",
"deepseek_r1_0528_fp8": "DeepSeek-R1/DeepSeek-R1-0528/",
Expand All @@ -106,8 +113,21 @@
"qwen_14b_chat": "Qwen-14B-Chat",
"qwen3_0.6b": "Qwen3/Qwen3-0.6B",
"qwen3_4b_eagle3": "Qwen3/Qwen3-4B",
"qwen3_8b": "Qwen3/Qwen3-8B",
"qwen3_8b_fp8": "Qwen3/nvidia-Qwen3-8B-FP8",
"qwen3_8b_fp4": "Qwen3/nvidia-Qwen3-8B-NVFP4",
"qwen3_14b": "Qwen3/Qwen3-14B",
"qwen3_14b_fp8": "Qwen3/nvidia-Qwen3-14B-FP8",
"qwen3_14b_fp4": "Qwen3/nvidia-Qwen3-14B-NVFP4",
"qwen3_30b_a3b": "Qwen3/Qwen3-30B-A3B",
"qwen3_30b_a3b_fp4": "Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf",
"qwen3_32b": "Qwen3/Qwen3-32B",
"qwen3_32b_fp4": "Qwen3/nvidia-Qwen3-32B-NVFP4",
"qwen3_235b_a22b_fp8": "Qwen3/saved_models_Qwen3-235B-A22B_fp8_hf",
"qwen3_235b_a22b_fp4": "Qwen3/saved_models_Qwen3-235B-A22B_nvfp4_hf",
"qwen2_5_vl_7b_instruct": "multimodals/Qwen2.5-VL-7B-Instruct",
"qwen2_5_vl_7b_instruct_fp8": "multimodals/Qwen2.5-VL-7B-Instruct-FP8",
"qwen2_5_vl_7b_instruct_fp4": "multimodals/Qwen2.5-VL-7B-Instruct-FP4",
"starcoder2_3b": "starcoder2-3b",
"starcoder2_7b": "starcoder2-7b",
"starcoder2_15b": "starcoder2-15b",
Expand All @@ -126,9 +146,14 @@
"gpt_20b": "gpt-neox-20b",
"gpt_350m_moe": "gpt2-medium",
"phi_4_mini_instruct": "Phi-4-mini-instruct",
"phi_4_reasoning_plus": "Phi-4-reasoning-plus",
"phi_4_reasoning_plus_fp8": "nvidia-Phi-4-reasoning-plus-FP8",
"phi_4_reasoning_plus_fp4": "nvidia-Phi-4-reasoning-plus-NVFP4",
"phi_4_multimodal_instruct": "multimodals/Phi-4-multimodal-instruct",
"phi_4_multimodal_instruct_image": "multimodals/Phi-4-multimodal-instruct",
"phi_4_multimodal_instruct_audio": "multimodals/Phi-4-multimodal-instruct",
"phi_4_multimodal_instruct_fp4":
"multimodals/Phi-4-multimodal-instruct-FP4",
"phi_4_multimodal_instruct_fp4_image":
"multimodals/Phi-4-multimodal-instruct-FP4",
"phi_4_multimodal_instruct_fp4_audio":
Expand All @@ -137,12 +162,15 @@
"multimodals/Phi-4-multimodal-instruct-FP8",
"phi_4_multimodal_instruct_fp8_audio":
"multimodals/Phi-4-multimodal-instruct-FP8",
"phi_4_multimodal_instruct_fp8":
"multimodals/Phi-4-multimodal-instruct-FP8",
"bielik_11b_v2.2_instruct": "Bielik-11B-v2.2-Instruct",
"bielik_11b_v2.2_instruct_fp8": "Bielik-11B-v2.2-Instruct-FP8",
"mistral_small_v3.1_24b": "Mistral-Small-3.1-24B-Instruct-2503",
"gpt_oss_120b_fp4": "gpt_oss/gpt-oss-120b",
"gpt_oss_20b_fp4": "gpt_oss/gpt-oss-20b",
"nemotron_nano_9b_v2": "NVIDIA-Nemotron-Nano-12B-v2",
"nemotron_nano_12b_v2": "NVIDIA-Nemotron-Nano-12B-v2",
"nvidia_nemotron_nano_9b_v2_nvfp4": "NVIDIA-Nemotron-Nano-9B-v2-NVFP4",
"starcoder2_7b": "starcoder2-7b",
}
# Model PATH of HuggingFace
Expand Down
53 changes: 50 additions & 3 deletions tests/integration/defs/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -1902,11 +1902,47 @@ def test_ptp_quickstart(llm_root, llm_venv):
marks=skip_pre_blackwell),
pytest.param(
'GPT-OSS-120B', 'gpt_oss/gpt-oss-120b', marks=skip_pre_blackwell),
("Llama3.1-8B-bf16-instruct", "llama-3.1-model/Llama-3.1-8B-Instruct"),
pytest.param('Llama3.1-8B-FP4',
'modelopt-hf-model-hub/Llama-3.1-8B-Instruct-fp4',
marks=skip_pre_blackwell),
pytest.param(
'Qwen3-8b-fp8', 'Qwen3/nvidia-Qwen3-8B-FP8', marks=skip_pre_hopper),
pytest.param('Qwen3-8b-nvfp4',
'Qwen3/nvidia-Qwen3-8B-NVFP4',
marks=skip_pre_blackwell),
("Qwen3-8B-bf16", "Qwen3/Qwen3-8B"),
pytest.param(
'Qwen3-14b-fp8', 'Qwen3/nvidia-Qwen3-14B-FP8', marks=skip_pre_hopper),
pytest.param('Qwen3-14b-nvfp4',
'Qwen3/nvidia-Qwen3-14B-NVFP4',
marks=skip_pre_blackwell),
("Qwen3-14B-bf16", "Qwen3/Qwen3-14B"),
pytest.param('Qwen3-32b-nvfp4',
'Qwen3/nvidia-Qwen3-32B-NVFP4',
marks=skip_pre_blackwell),
("Qwen3-32B-bf16", "Qwen3/Qwen3-32B"),
pytest.param('Phi4-Reasoning-Plus-fp8',
'nvidia-Phi-4-reasoning-plus-FP8',
marks=skip_pre_hopper),
pytest.param('Phi4-Reasoning-Plus-nvfp4',
'nvidia-Phi-4-reasoning-plus-NVFP4',
marks=skip_pre_blackwell),
("Phi-4-reasoning-plus-bf16", "Phi-4-reasoning-plus"),
pytest.param('Nemotron-Super-49B-v1.5-FP8',
'nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1_5-FP8',
marks=skip_pre_hopper),
pytest.param('Llama-4-Scout-17B-16E-FP4',
'llama4-models/Llama-4-Scout-17B-16E-Instruct-FP4',
marks=skip_pre_blackwell),
pytest.param('Nemotron-Nano-9B-v2-nvfp4',
'NVIDIA-Nemotron-Nano-9B-v2-NVFP4',
marks=skip_pre_blackwell),
])
def test_ptp_quickstart_advanced(llm_root, llm_venv, model_name, model_path):
print(f"Testing {model_name}.")
example_root = Path(os.path.join(llm_root, "examples", "llm-api"))
if model_name == "Nemotron-H-8B":
if model_name in ("Nemotron-H-8B", "Nemotron-Nano-9B-v2-nvfp4"):
llm_venv.run_cmd([
str(example_root / "quickstart_advanced.py"),
"--disable_kv_cache_reuse",
Expand Down Expand Up @@ -1934,7 +1970,7 @@ def test_ptp_quickstart_advanced(llm_root, llm_venv, model_name, model_path):
]
if "Qwen3" in model_name:
cmds.append(f"--kv_cache_fraction=0.6")
if "Llama3.1-70B" in model_name:
if "Llama3.1-70B" in model_name or "Llama3.3-70B" in model_name:
cmds.append(f"--max_num_tokens=1024")
llm_venv.run_cmd(cmds, stdout=running_log)
if model_name in mapping:
Expand Down Expand Up @@ -2053,11 +2089,22 @@ def test_ptp_quickstart_advanced_deepseek_multi_nodes(llm_root, llm_venv,
@pytest.mark.parametrize("model_name,model_path,eagle_model_path", [
("Llama-3.1-8b-Instruct", "llama-3.1-model/Llama-3.1-8B-Instruct",
"EAGLE3-LLaMA3.1-Instruct-8B"),
pytest.param('GPT-OSS-120B-Eagle3',
'gpt_oss/gpt-oss-120b',
'gpt_oss/gpt-oss-120b-Eagle3',
marks=skip_pre_blackwell),
])
def test_ptp_quickstart_advanced_eagle3(llm_root, llm_venv, model_name,
model_path, eagle_model_path):
print(f"Testing {model_name}.")
example_root = Path(os.path.join(llm_root, "examples", "llm-api"))

# Set expected memory based on model size
if "GPT-OSS-120B" in model_name:
expected_mem = [106.71, 0, 0, 0] # Memory for 120B model with Eagle3
else:
expected_mem = [25.2, 0, 0, 0] # Memory for Llama-3.1-8B with Eagle3

with tempfile.NamedTemporaryFile(mode='w+t',
suffix=f".{model_name}.log",
dir="./",
Expand All @@ -2077,7 +2124,7 @@ def test_ptp_quickstart_advanced_eagle3(llm_root, llm_venv, model_name,
"--disable_overlap_scheduler",
],
stdout=running_log)
_check_mem_usage(running_log, [25.2, 0, 0, 0])
_check_mem_usage(running_log, expected_mem)


@pytest.mark.parametrize("model_name,model_path,eagle_model_path", [
Expand Down
39 changes: 39 additions & 0 deletions tests/integration/test_lists/qa/llm_digits_core.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP8-llama-3.1-model/Llama-3.1-8B-Instruct-FP8]
test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP4-modelopt-hf-model-hub/Llama-3.1-8B-Instruct-fp4]
test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-bf16-instruct-llama-3.1-model/Llama-3.1-8B-Instruct]
test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-20B-gpt_oss/gpt-oss-20b]
test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-120B-gpt_oss/gpt-oss-120b]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-8b-fp8-Qwen3/nvidia-Qwen3-8B-FP8]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-8b-nvfp4-Qwen3/nvidia-Qwen3-8B-NVFP4]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-8B-bf16-Qwen3/Qwen3-8B]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-14b-fp8-Qwen3/nvidia-Qwen3-14B-FP8]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-14b-nvfp4-Qwen3/nvidia-Qwen3-14B-NVFP4]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-14B-bf16-Qwen3/Qwen3-14B]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-audio]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image_audio]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp4-multimodals/Phi-4-multimodal-instruct-FP4-image]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp4-multimodals/Phi-4-multimodal-instruct-FP4-audio]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp4-multimodals/Phi-4-multimodal-instruct-FP4-image_audio]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp8-multimodals/Phi-4-multimodal-instruct-FP8-image]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp8-multimodals/Phi-4-multimodal-instruct-FP8-audio]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp8-multimodals/Phi-4-multimodal-instruct-FP8-image_audio]
test_e2e.py::test_ptp_quickstart_advanced[Phi4-Reasoning-Plus-fp8-nvidia-Phi-4-reasoning-plus-FP8]
test_e2e.py::test_ptp_quickstart_advanced[Phi4-Reasoning-Plus-nvfp4-nvidia-Phi-4-reasoning-plus-NVFP4]
test_e2e.py::test_ptp_quickstart_advanced[Phi-4-reasoning-plus-bf16-Phi-4-reasoning-plus]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32B-bf16-Qwen3/Qwen3-32B]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32b-nvfp4-Qwen3/nvidia-Qwen3-32B-NVFP4]
test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Nano-9B-v2-nvfp4-NVIDIA-Nemotron-Nano-9B-v2-NVFP4]
test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1.5-FP8-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1_5-FP8]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf]
test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP8-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp8]
test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP4-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp4]

accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4
accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=False]
accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=True]
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_fp8
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_fp4
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype
59 changes: 39 additions & 20 deletions tests/integration/test_lists/qa/llm_digits_func.txt
Original file line number Diff line number Diff line change
@@ -1,25 +1,44 @@
test_e2e.py::test_ptp_quickstart
test_e2e.py::test_ptp_quickstart_advanced_mixed_precision #Llama-3_1-8B-Instruct_fp8_nvfp4_hf
test_e2e.py::test_ptp_star_attention_example[Llama3.1-8B-BF16-llama-3.1-model/Meta-Llama-3.1-8B]
test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-BF16-llama-3.1-model/Meta-Llama-3.1-8B]
test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-NVFP4-nvfp4-quantized/Meta-Llama-3.1-8B]
test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-20B-gpt_oss/gpt-oss-20b]
test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-120B-gpt_oss/gpt-oss-120b]
test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-bf16-instruct-llama-3.1-model/Llama-3.1-8B-Instruct]
test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP8-llama-3.1-model/Llama-3.1-8B-Instruct-FP8]
test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-FP8-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1-FP8]
test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1]
test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-FP8-Mixtral-8x7B-Instruct-v0.1-fp8]
test_e2e.py::test_ptp_quickstart_advanced_eagle3[Llama-3.1-8b-Instruct-llama-3.1-model/Llama-3.1-8B-Instruct-EAGLE3-LLaMA3.1-Instruct-8B]
test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP4-modelopt-hf-model-hub/Llama-3.1-8B-Instruct-fp4]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-8b-fp8-Qwen3/nvidia-Qwen3-8B-FP8]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-8b-nvfp4-Qwen3/nvidia-Qwen3-8B-NVFP4]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-8B-bf16-Qwen3/Qwen3-8B]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-14b-fp8-Qwen3/nvidia-Qwen3-14B-FP8]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-14b-nvfp4-Qwen3/nvidia-Qwen3-14B-NVFP4]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-14B-bf16-Qwen3/Qwen3-14B]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32B-bf16-Qwen3/Qwen3-32B]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32b-nvfp4-Qwen3/nvidia-Qwen3-32B-NVFP4]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf]
test_e2e.py::test_ptp_quickstart_advanced[Phi4-Reasoning-Plus-fp8-nvidia-Phi-4-reasoning-plus-FP8]
test_e2e.py::test_ptp_quickstart_advanced[Phi4-Reasoning-Plus-nvfp4-nvidia-Phi-4-reasoning-plus-NVFP4]
test_e2e.py::test_ptp_quickstart_advanced[Phi-4-reasoning-plus-bf16-Phi-4-reasoning-plus]
test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP8-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp8]
test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP4-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp4]
test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-BF16-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1]
test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-BF16-Mixtral-8x7B-Instruct-v0.1]
test_e2e.py::test_ptp_quickstart_advanced[Mistral-Nemo-12b-Base-Mistral-Nemo-Base-2407]
test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-FP8-llama-3.1-model/Llama-3.1-70B-Instruct-FP8]
test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1.5-FP8-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1_5-FP8]
test_e2e.py::test_ptp_quickstart_advanced[Llama-4-Scout-17B-16E-FP4-llama4-models/Llama-4-Scout-17B-16E-Instruct-FP4]
test_e2e.py::test_ptp_quickstart_advanced[DeepSeek-R1-Distill-Qwen-32B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-32B]
test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Nano-9B-v2-nvfp4-NVIDIA-Nemotron-Nano-9B-v2-NVFP4]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-audio]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image_audio]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp4-multimodals/Phi-4-multimodal-instruct-FP4-image]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp4-multimodals/Phi-4-multimodal-instruct-FP4-audio]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp4-multimodals/Phi-4-multimodal-instruct-FP4-image_audio]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp8-multimodals/Phi-4-multimodal-instruct-FP8-image]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp8-multimodals/Phi-4-multimodal-instruct-FP8-audio]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp8-multimodals/Phi-4-multimodal-instruct-FP8-image_audio]
test_e2e.py::test_ptp_quickstart_advanced_eagle3[GPT-OSS-120B-Eagle3-gpt_oss/gpt-oss-120b-gpt_oss/gpt-oss-120b-Eagle3]

accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_llm_sampler
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_eagle3[sampler_async_worker=False-eagle3_one_model=False-overlap_scheduler=False]
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_eagle3[sampler_async_worker=False-eagle3_one_model=True-overlap_scheduler=True]
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_eagle3[sampler_async_worker=True-eagle3_one_model=True-overlap_scheduler=True]
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_ngram

accuracy/test_llm_api_pytorch_multimodal.py::TestNVILA_8B::test_auto_dtype
accuracy/test_llm_api_pytorch_multimodal.py::TestVILA1_5_3B::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4
accuracy/test_llm_api_pytorch_multimodal.py::TestQwen2_5_VL_7B::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=False]
accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=True]
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_fp8
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_fp4
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype
Loading