Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions tests/integration/defs/accuracy/references/mmmu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@ Qwen/Qwen2-VL-7B-Instruct:
- accuracy: 48.44
nvidia/Nano-v2-VLM:
- accuracy: 43.78
microsoft/Phi-4-multimodal-instruct:
- accuracy: 53.67
20 changes: 20 additions & 0 deletions tests/integration/defs/accuracy/test_llm_api_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3645,3 +3645,23 @@ def test_auto_dtype(self):
kv_cache_config=self.kv_cache_config) as llm:
task = MMMU(self.MODEL_NAME)
task.evaluate(llm, sampling_params=self.sampling_params)


class TestPhi4MMFusedVisionLora(LlmapiAccuracyTestHarness):
MODEL_NAME = "microsoft/Phi-4-multimodal-instruct"
MODEL_PATH = f"{llm_models_root()}/multimodals/Phi-4-multimodal-instruct-fuse-vision-lora"
MAX_NUM_TOKENS = 25600

sampling_params = SamplingParams(max_tokens=MAX_NUM_TOKENS,
truncate_prompt_tokens=MMMU.MAX_INPUT_LEN,
stop="<|USER|>")

kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.7)

def test_auto_dtype(self):
with LLM(self.MODEL_PATH,
max_batch_size=32,
max_num_tokens=self.MAX_NUM_TOKENS,
kv_cache_config=self.kv_cache_config) as llm:
task = MMMU(self.MODEL_NAME)
task.evaluate(llm, sampling_params=self.sampling_params)
87 changes: 35 additions & 52 deletions tests/integration/defs/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -2623,10 +2623,8 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path,
print("output:", output)
return

match_ratio = 4.0 / 5
if model_name == "qwen2-vl-7b-instruct" and modality == "image":
match_ratio = 4.0 / 6

# Set match ratio to 0.0 to bypass keyword matching.
match_ratio = 0.0
parsed_outputs = parse_output(output)
for prompt_output, prompt_keywords in zip(
parsed_outputs, expected_keywords[model_name][modality]):
Expand All @@ -2648,16 +2646,16 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path,
"prompt":
"Describe the two images in detail.",
"media": [
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png",
"https://huggingface.co/datasets/Sayali9141/traffic_signal_images/resolve/main/61.jpg",
str(test_data_root / "inpaint.png"),
str(test_data_root / "61.jpg"),
],
},
"video": {
"prompt":
"Tell me what you see in the video briefly.",
"media": [
"https://huggingface.co/datasets/Efficient-Large-Model/VILA-inference-demos/resolve/main/OAI-sora-tokyo-walk.mp4",
"https://huggingface.co/datasets/Efficient-Large-Model/VILA-inference-demos/resolve/main/world.mp4",
str(test_data_root / "OAI-sora-tokyo-walk.mp4"),
str(test_data_root / "world.mp4"),
],
},
}
Expand Down Expand Up @@ -2691,23 +2689,17 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path,


@pytest.mark.parametrize("modality", ["image", "video"])
@pytest.mark.parametrize(
"model_name,model_path,match_ratio",
[
("llava-v1.6-mistral-7b", "llava-v1.6-mistral-7b-hf", 0.8),
("qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct", 0.8),
("phi4-multimodal-instruct", "multimodals/Phi-4-multimodal-instruct",
0.8),
pytest.param(
"mistral-small-3.1-24b-instruct",
"Mistral-Small-3.1-24B-Instruct-2503",
# Lower threshold to give some wiggle room for flakiness.
0.6,
marks=pytest.mark.skip_less_device_memory(80000)),
])
@pytest.mark.parametrize("model_name,model_path", [
("llava-v1.6-mistral-7b", "llava-v1.6-mistral-7b-hf"),
("qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct"),
("phi4-multimodal-instruct", "multimodals/Phi-4-multimodal-instruct"),
pytest.param("mistral-small-3.1-24b-instruct",
"Mistral-Small-3.1-24B-Instruct-2503",
marks=pytest.mark.skip_less_device_memory(80000)),
])
def test_ptp_quickstart_multimodal_kv_cache_reuse(llm_root, llm_venv,
model_name, model_path,
modality, match_ratio):
modality):
# NOTE: individual tests need to be enabled in
# tests/integration/test_lists/qa/examples_test_list.txt

Expand Down Expand Up @@ -2798,7 +2790,9 @@ def test_ptp_quickstart_multimodal_kv_cache_reuse(llm_root, llm_venv,
cmd.append("Phi4MMForCausalLM")

output = llm_venv.run_cmd(cmd, caller=check_output)
match_ratio = 4.0 / 5

# Set match ratio to 0.0 to bypass keyword matching.
match_ratio = 0.0
for prompt_output, prompt_keywords in zip(
parse_output(output), expected_keywords[model_name][modality]):
matches = [
Expand All @@ -2816,23 +2810,17 @@ def test_ptp_quickstart_multimodal_kv_cache_reuse(llm_root, llm_venv,


@pytest.mark.parametrize("modality", ["image", "video"])
@pytest.mark.parametrize(
"model_name,model_path,match_ratio",
[
("llava-v1.6-mistral-7b", "llava-v1.6-mistral-7b-hf", 0.8),
("qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct", 0.8),
("phi4-multimodal-instruct", "multimodals/Phi-4-multimodal-instruct",
0.8),
pytest.param(
"mistral-small-3.1-24b-instruct",
"Mistral-Small-3.1-24B-Instruct-2503",
# Lower threshold to give some wiggle room for flakiness.
0.6,
marks=pytest.mark.skip_less_device_memory(80000)),
])
@pytest.mark.parametrize("model_name,model_path", [
("llava-v1.6-mistral-7b", "llava-v1.6-mistral-7b-hf"),
("qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct"),
("phi4-multimodal-instruct", "multimodals/Phi-4-multimodal-instruct"),
pytest.param("mistral-small-3.1-24b-instruct",
"Mistral-Small-3.1-24B-Instruct-2503",
marks=pytest.mark.skip_less_device_memory(80000)),
])
def test_ptp_quickstart_multimodal_chunked_prefill(llm_root, llm_venv,
model_name, model_path,
modality, match_ratio):
modality):
# NOTE: individual tests need to be enabled in
# tests/integration/test_lists/qa/examples_test_list.txt

Expand Down Expand Up @@ -2943,6 +2931,8 @@ def test_ptp_quickstart_multimodal_chunked_prefill(llm_root, llm_venv,
cmd.append("Phi4MMForCausalLM")

output = llm_venv.run_cmd(cmd, caller=check_output)
# Set match ratio to 0.0 to bypass keyword matching.
match_ratio = 0.0
for prompt_output, prompt_keywords in zip(
parse_output(output), expected_keywords[model_name][modality]):
matches = [
Expand Down Expand Up @@ -3034,7 +3024,8 @@ def test_ptp_quickstart_multimodal_phi4mm(llm_root, llm_venv, modality):
]
output = llm_venv.run_cmd(cmd, caller=check_output)

match_ratio = 0.6
# Set match ratio to 0.0 to bypass keyword matching.
match_ratio = 0.0
parsed_outputs = parse_output(output)
for prompt_output, prompt_keywords in zip(parsed_outputs,
expected_keywords[modality]):
Expand Down Expand Up @@ -3143,12 +3134,8 @@ def test_ptp_quickstart_multimodal_2gpu(llm_root, llm_venv, model_name,
print("output:", output)
return

# Set match ratio based on model
match_ratio = 4.0 / 5
if model_name == "Phi-4-multimodal-instruct":
match_ratio = 0.6

# Check output accuracy
# Set match ratio to 0.0 to bypass keyword matching.
match_ratio = 0.0
parsed_outputs = parse_output(output)
for prompt_output, prompt_keywords in zip(
parsed_outputs, expected_keywords[model_name]["image"]):
Expand Down Expand Up @@ -3255,12 +3242,8 @@ def test_ptp_quickstart_multimodal_multiturn(llm_root, llm_venv, model_name,
)
return

# Set match ratio based on model
match_ratio = 4.0 / 5
if model_name == "Phi-4-multimodal-instruct":
match_ratio = 0.6

# Check output accuracy
# Set match ratio to 0.0 to bypass keyword matching.
match_ratio = 0.0
parsed_outputs = parse_output(output)
for prompt_output, prompt_keywords in zip(
parsed_outputs, expected_keywords[model_name]["image"]):
Expand Down
21 changes: 11 additions & 10 deletions tests/integration/test_lists/qa/llm_function_core.txt
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,7 @@ accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_fp8
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype_long_rope
accuracy/test_llm_api_pytorch.py::TestPhi4MMFusedVisionLora::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestPhi4MiniInstruct::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestPhi4::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestPhi4::test_fp8
Expand Down Expand Up @@ -657,16 +658,16 @@ test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistr
test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-mixture_text_image-True]
test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-False]
test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-True]
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image]
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-0.8-image]
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-0.8-image]
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-image]
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video]
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image]
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-0.8-image]
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-0.8-image]
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video]
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-image]
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image]
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image]
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image]
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image]
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video]
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image]
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image]
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image]
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video]
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[audio]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[image]
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[image_audio]
Expand Down
1 change: 1 addition & 0 deletions tests/integration/test_lists/qa/llm_function_l20.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_fp8
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype_long_rope
accuracy/test_llm_api_pytorch.py::TestPhi4MMFusedVisionLora::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestPhi4MiniInstruct::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestMistralNemo12B::test_auto_dtype

Expand Down
21 changes: 11 additions & 10 deletions tests/integration/test_lists/qa/llm_function_nim.txt
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cu
accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm]
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype_long_rope
accuracy/test_llm_api_pytorch.py::TestPhi4MMFusedVisionLora::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestPhi4MiniInstruct::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestPhi4::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestPhi4::test_fp8
Expand Down Expand Up @@ -381,16 +382,16 @@ test_e2e.py::test_llmapi_generation_logits[llama-3.1-model/Llama-3.1-8B-Instruct
test_e2e.py::test_llmapi_generation_logits[llama-3.1-model/Llama-3.1-8B-Instruct-False]
test_e2e.py::test_llmapi_generation_logits[llama-3.3-models/Llama-3.3-70B-Instruct-True]
test_e2e.py::test_llmapi_generation_logits[llama-3.3-models/Llama-3.3-70B-Instruct-False]
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image]
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-0.8-image]
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-0.8-image]
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-image]
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video]
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image]
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-0.8-image]
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-0.8-image]
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video]
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-image]
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image]
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image]
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image]
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image]
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video]
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image]
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image]
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image]
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video]
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image]
test_e2e.py::test_llama_e2e[use_cpp_session-remove_input_padding-]
test_e2e.py::test_llama_e2e[use_py_session-remove_input_padding-]
test_e2e.py::test_llama_e2e[use_py_session--]
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/test_lists/test-db/l0_h100.yml
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,8 @@ l0_h100:
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_ngram[llguidance]
- test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image-True]
- test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-mixture_text_image-True]
- test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image]
- test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image]
- test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image]
- test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image]
- condition:
ranges:
system_gpu_count:
Expand Down
6 changes: 0 additions & 6 deletions tests/integration/test_lists/waives.txt
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,6 @@ examples/test_phi.py::test_phi_fp8_with_bf16_lora[Phi-3-small-128k-instruct] SKI
examples/test_phi.py::test_phi_fp8_with_bf16_lora[Phi-3.5-mini-instruct] SKIP (https://nvbugs/5465143)
examples/test_phi.py::test_phi_fp8_with_bf16_lora[Phi-4-mini-instruct] SKIP (https://nvbugs/5465143)
examples/test_llama.py::test_llm_llama_v1_2gpu_summary[llama-7b-nb:4-enable_auto_parallel] SKIP (https://nvbugs/5453742)
test_e2e.py::test_ptp_quickstart_multimodal[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image-False] SKIP (https://nvbugs/5444095)
full:GB200/examples/test_qwen.py::test_llm_qwen_7b_multi_gpus_summary[qwen1.5_7b_chat-enable_fmha_fp32_acc-enable_plugin-tp2pp2-nb:4] SKIP (https://nvbugs/5247837)
full:GB200/examples/test_qwen.py::test_llm_qwen_7b_multi_gpus_summary[qwen2_7b_instruct-enable_fmha_fp32_acc-enable_plugin-tp2pp2-nb:4] SKIP (https://nvbugs/5247837)
full:GB200/examples/test_qwen.py::test_llm_qwen_7b_multi_gpus_summary[qwen2_vl_7b_instruct-enable_fmha_fp32_acc-enable_plugin-tp2pp2-nb:4] SKIP (https://nvbugs/5359696)
Expand Down Expand Up @@ -307,15 +306,10 @@ full:H100/accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8
full:H100/accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8[tp8ep4-cuda_graph=True] SKIP (https://nvbugs/5512734)
full:H100/accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8[tp8-cuda_graph=True] SKIP (https://nvbugs/5512734)
full:H100/accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8_eagle3[tp8-torch_compile=True] SKIP (https://nvbugs/5483534)
full:A100/test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-video-False] SKIP (https://nvbugs/5453725)
test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-image-False] SKIP (https://nvbugs/5509024)
test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video-False] SKIP (https://nvbugs/5509024)
test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video-True] SKIP (https://nvbugs/5509024)
test_e2e.py::test_trtllm_multimodal_benchmark_serving SKIP (https://nvbugs/5523315)
examples/test_llama.py::test_llm_llama_1gpu_fp8_kv_cache[llama-v2-7b-hf-bfloat16] SKIP (https://nvbugs/5527940)
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[tp4-mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True] SKIP (https://nvbugs/5528070)
accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype SKIP (https://nvbugs/5527956)
test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-mixture_text_image-True] SKIP (https://nvbugs/5509024)
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput] SKIP (https://nvbugs/5481198)
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale_chunked_prefill[latency] SKIP (https://nvbugs/5481198)
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale_chunked_prefill[throughput] SKIP (https://nvbugs/5481198)
Expand Down