From e84da2ab3e903c05a3fa3bd250bc7651f751857e Mon Sep 17 00:00:00 2001 From: Wanli Jiang <35160485+Wanli-Jiang@users.noreply.github.com> Date: Thu, 13 Nov 2025 23:13:21 -0800 Subject: [PATCH] [None][fix] Bypass key-word matching for multimodal tests It will fix * https://nvbugs/5547437 * https://nvbugs/5568836 * https://nvbugs/5591109 * https://nvbugs/5630274 Also unwaived the below tests: * https://nvbugs/5509024 * https://nvbugs/5444095 * https://nvbugs/5453725 Signed-off-by: Wanli Jiang <35160485+Wanli-Jiang@users.noreply.github.com> --- .../defs/accuracy/references/mmmu.yaml | 2 + .../defs/accuracy/test_llm_api_pytorch.py | 20 +++++ tests/integration/defs/test_e2e.py | 87 ++++++++----------- .../test_lists/qa/llm_function_core.txt | 21 ++--- .../test_lists/qa/llm_function_l20.txt | 1 + .../test_lists/qa/llm_function_nim.txt | 21 ++--- .../test_lists/test-db/l0_h100.yml | 4 +- tests/integration/test_lists/waives.txt | 6 -- 8 files changed, 82 insertions(+), 80 deletions(-) diff --git a/tests/integration/defs/accuracy/references/mmmu.yaml b/tests/integration/defs/accuracy/references/mmmu.yaml index b9dc7c11d71..d479afc59a7 100644 --- a/tests/integration/defs/accuracy/references/mmmu.yaml +++ b/tests/integration/defs/accuracy/references/mmmu.yaml @@ -2,3 +2,5 @@ Qwen/Qwen2-VL-7B-Instruct: - accuracy: 48.44 nvidia/Nano-v2-VLM: - accuracy: 43.78 +microsoft/Phi-4-multimodal-instruct: + - accuracy: 53.67 diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py index 4b3d794bd7c..b7327315230 100644 --- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py +++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py @@ -3645,3 +3645,23 @@ def test_auto_dtype(self): kv_cache_config=self.kv_cache_config) as llm: task = MMMU(self.MODEL_NAME) task.evaluate(llm, sampling_params=self.sampling_params) + + +class TestPhi4MMFusedVisionLora(LlmapiAccuracyTestHarness): + MODEL_NAME = "microsoft/Phi-4-multimodal-instruct" + MODEL_PATH = f"{llm_models_root()}/multimodals/Phi-4-multimodal-instruct-fuse-vision-lora" + MAX_NUM_TOKENS = 25600 + + sampling_params = SamplingParams(max_tokens=MAX_NUM_TOKENS, + truncate_prompt_tokens=MMMU.MAX_INPUT_LEN, + stop="<|USER|>") + + kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.7) + + def test_auto_dtype(self): + with LLM(self.MODEL_PATH, + max_batch_size=32, + max_num_tokens=self.MAX_NUM_TOKENS, + kv_cache_config=self.kv_cache_config) as llm: + task = MMMU(self.MODEL_NAME) + task.evaluate(llm, sampling_params=self.sampling_params) diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py index 411c40248bc..cd7b3aa755d 100644 --- a/tests/integration/defs/test_e2e.py +++ b/tests/integration/defs/test_e2e.py @@ -2623,10 +2623,8 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path, print("output:", output) return - match_ratio = 4.0 / 5 - if model_name == "qwen2-vl-7b-instruct" and modality == "image": - match_ratio = 4.0 / 6 - + # Set match ratio to 0.0 to bypass keyword matching. + match_ratio = 0.0 parsed_outputs = parse_output(output) for prompt_output, prompt_keywords in zip( parsed_outputs, expected_keywords[model_name][modality]): @@ -2648,16 +2646,16 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path, "prompt": "Describe the two images in detail.", "media": [ - "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png", - "https://huggingface.co/datasets/Sayali9141/traffic_signal_images/resolve/main/61.jpg", + str(test_data_root / "inpaint.png"), + str(test_data_root / "61.jpg"), ], }, "video": { "prompt": "Tell me what you see in the video briefly.", "media": [ - "https://huggingface.co/datasets/Efficient-Large-Model/VILA-inference-demos/resolve/main/OAI-sora-tokyo-walk.mp4", - "https://huggingface.co/datasets/Efficient-Large-Model/VILA-inference-demos/resolve/main/world.mp4", + str(test_data_root / "OAI-sora-tokyo-walk.mp4"), + str(test_data_root / "world.mp4"), ], }, } @@ -2691,23 +2689,17 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path, @pytest.mark.parametrize("modality", ["image", "video"]) -@pytest.mark.parametrize( - "model_name,model_path,match_ratio", - [ - ("llava-v1.6-mistral-7b", "llava-v1.6-mistral-7b-hf", 0.8), - ("qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct", 0.8), - ("phi4-multimodal-instruct", "multimodals/Phi-4-multimodal-instruct", - 0.8), - pytest.param( - "mistral-small-3.1-24b-instruct", - "Mistral-Small-3.1-24B-Instruct-2503", - # Lower threshold to give some wiggle room for flakiness. - 0.6, - marks=pytest.mark.skip_less_device_memory(80000)), - ]) +@pytest.mark.parametrize("model_name,model_path", [ + ("llava-v1.6-mistral-7b", "llava-v1.6-mistral-7b-hf"), + ("qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct"), + ("phi4-multimodal-instruct", "multimodals/Phi-4-multimodal-instruct"), + pytest.param("mistral-small-3.1-24b-instruct", + "Mistral-Small-3.1-24B-Instruct-2503", + marks=pytest.mark.skip_less_device_memory(80000)), +]) def test_ptp_quickstart_multimodal_kv_cache_reuse(llm_root, llm_venv, model_name, model_path, - modality, match_ratio): + modality): # NOTE: individual tests need to be enabled in # tests/integration/test_lists/qa/examples_test_list.txt @@ -2798,7 +2790,9 @@ def test_ptp_quickstart_multimodal_kv_cache_reuse(llm_root, llm_venv, cmd.append("Phi4MMForCausalLM") output = llm_venv.run_cmd(cmd, caller=check_output) - match_ratio = 4.0 / 5 + + # Set match ratio to 0.0 to bypass keyword matching. + match_ratio = 0.0 for prompt_output, prompt_keywords in zip( parse_output(output), expected_keywords[model_name][modality]): matches = [ @@ -2816,23 +2810,17 @@ def test_ptp_quickstart_multimodal_kv_cache_reuse(llm_root, llm_venv, @pytest.mark.parametrize("modality", ["image", "video"]) -@pytest.mark.parametrize( - "model_name,model_path,match_ratio", - [ - ("llava-v1.6-mistral-7b", "llava-v1.6-mistral-7b-hf", 0.8), - ("qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct", 0.8), - ("phi4-multimodal-instruct", "multimodals/Phi-4-multimodal-instruct", - 0.8), - pytest.param( - "mistral-small-3.1-24b-instruct", - "Mistral-Small-3.1-24B-Instruct-2503", - # Lower threshold to give some wiggle room for flakiness. - 0.6, - marks=pytest.mark.skip_less_device_memory(80000)), - ]) +@pytest.mark.parametrize("model_name,model_path", [ + ("llava-v1.6-mistral-7b", "llava-v1.6-mistral-7b-hf"), + ("qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct"), + ("phi4-multimodal-instruct", "multimodals/Phi-4-multimodal-instruct"), + pytest.param("mistral-small-3.1-24b-instruct", + "Mistral-Small-3.1-24B-Instruct-2503", + marks=pytest.mark.skip_less_device_memory(80000)), +]) def test_ptp_quickstart_multimodal_chunked_prefill(llm_root, llm_venv, model_name, model_path, - modality, match_ratio): + modality): # NOTE: individual tests need to be enabled in # tests/integration/test_lists/qa/examples_test_list.txt @@ -2943,6 +2931,8 @@ def test_ptp_quickstart_multimodal_chunked_prefill(llm_root, llm_venv, cmd.append("Phi4MMForCausalLM") output = llm_venv.run_cmd(cmd, caller=check_output) + # Set match ratio to 0.0 to bypass keyword matching. + match_ratio = 0.0 for prompt_output, prompt_keywords in zip( parse_output(output), expected_keywords[model_name][modality]): matches = [ @@ -3034,7 +3024,8 @@ def test_ptp_quickstart_multimodal_phi4mm(llm_root, llm_venv, modality): ] output = llm_venv.run_cmd(cmd, caller=check_output) - match_ratio = 0.6 + # Set match ratio to 0.0 to bypass keyword matching. + match_ratio = 0.0 parsed_outputs = parse_output(output) for prompt_output, prompt_keywords in zip(parsed_outputs, expected_keywords[modality]): @@ -3143,12 +3134,8 @@ def test_ptp_quickstart_multimodal_2gpu(llm_root, llm_venv, model_name, print("output:", output) return - # Set match ratio based on model - match_ratio = 4.0 / 5 - if model_name == "Phi-4-multimodal-instruct": - match_ratio = 0.6 - - # Check output accuracy + # Set match ratio to 0.0 to bypass keyword matching. + match_ratio = 0.0 parsed_outputs = parse_output(output) for prompt_output, prompt_keywords in zip( parsed_outputs, expected_keywords[model_name]["image"]): @@ -3255,12 +3242,8 @@ def test_ptp_quickstart_multimodal_multiturn(llm_root, llm_venv, model_name, ) return - # Set match ratio based on model - match_ratio = 4.0 / 5 - if model_name == "Phi-4-multimodal-instruct": - match_ratio = 0.6 - - # Check output accuracy + # Set match ratio to 0.0 to bypass keyword matching. + match_ratio = 0.0 parsed_outputs = parse_output(output) for prompt_output, prompt_keywords in zip( parsed_outputs, expected_keywords[model_name]["image"]): diff --git a/tests/integration/test_lists/qa/llm_function_core.txt b/tests/integration/test_lists/qa/llm_function_core.txt index e9bf3687952..52517818b0a 100644 --- a/tests/integration/test_lists/qa/llm_function_core.txt +++ b/tests/integration/test_lists/qa/llm_function_core.txt @@ -597,6 +597,7 @@ accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_auto_dtype accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_fp8 accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype_long_rope +accuracy/test_llm_api_pytorch.py::TestPhi4MMFusedVisionLora::test_auto_dtype accuracy/test_llm_api_pytorch.py::TestPhi4MiniInstruct::test_auto_dtype accuracy/test_llm_api_pytorch.py::TestPhi4::test_auto_dtype accuracy/test_llm_api_pytorch.py::TestPhi4::test_fp8 @@ -657,16 +658,16 @@ test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistr test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-mixture_text_image-True] test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-False] test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-True] -test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image] -test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-0.8-image] -test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-0.8-image] -test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-image] -test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video] -test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image] -test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-0.8-image] -test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-0.8-image] -test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video] -test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-image] +test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image] +test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image] +test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image] +test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image] +test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video] +test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image] +test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image] +test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image] +test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video] +test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image] test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[audio] test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[image] test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[image_audio] diff --git a/tests/integration/test_lists/qa/llm_function_l20.txt b/tests/integration/test_lists/qa/llm_function_l20.txt index c95aa0ab7d2..7f6112f4518 100644 --- a/tests/integration/test_lists/qa/llm_function_l20.txt +++ b/tests/integration/test_lists/qa/llm_function_l20.txt @@ -41,6 +41,7 @@ accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_auto_dtype accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_fp8 accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype_long_rope +accuracy/test_llm_api_pytorch.py::TestPhi4MMFusedVisionLora::test_auto_dtype accuracy/test_llm_api_pytorch.py::TestPhi4MiniInstruct::test_auto_dtype accuracy/test_llm_api_pytorch.py::TestMistralNemo12B::test_auto_dtype diff --git a/tests/integration/test_lists/qa/llm_function_nim.txt b/tests/integration/test_lists/qa/llm_function_nim.txt index 4e3812ddd06..55c69fe1375 100644 --- a/tests/integration/test_lists/qa/llm_function_nim.txt +++ b/tests/integration/test_lists/qa/llm_function_nim.txt @@ -348,6 +348,7 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cu accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm] accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype_long_rope +accuracy/test_llm_api_pytorch.py::TestPhi4MMFusedVisionLora::test_auto_dtype accuracy/test_llm_api_pytorch.py::TestPhi4MiniInstruct::test_auto_dtype accuracy/test_llm_api_pytorch.py::TestPhi4::test_auto_dtype accuracy/test_llm_api_pytorch.py::TestPhi4::test_fp8 @@ -381,16 +382,16 @@ test_e2e.py::test_llmapi_generation_logits[llama-3.1-model/Llama-3.1-8B-Instruct test_e2e.py::test_llmapi_generation_logits[llama-3.1-model/Llama-3.1-8B-Instruct-False] test_e2e.py::test_llmapi_generation_logits[llama-3.3-models/Llama-3.3-70B-Instruct-True] test_e2e.py::test_llmapi_generation_logits[llama-3.3-models/Llama-3.3-70B-Instruct-False] -test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image] -test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-0.8-image] -test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-0.8-image] -test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-image] -test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video] -test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image] -test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-0.8-image] -test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-0.8-image] -test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video] -test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-image] +test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image] +test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image] +test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image] +test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image] +test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video] +test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image] +test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image] +test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image] +test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video] +test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image] test_e2e.py::test_llama_e2e[use_cpp_session-remove_input_padding-] test_e2e.py::test_llama_e2e[use_py_session-remove_input_padding-] test_e2e.py::test_llama_e2e[use_py_session--] diff --git a/tests/integration/test_lists/test-db/l0_h100.yml b/tests/integration/test_lists/test-db/l0_h100.yml index 32b0afd9254..f8169776d8e 100644 --- a/tests/integration/test_lists/test-db/l0_h100.yml +++ b/tests/integration/test_lists/test-db/l0_h100.yml @@ -242,8 +242,8 @@ l0_h100: - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_ngram[llguidance] - test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image-True] - test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-mixture_text_image-True] - - test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image] - - test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image] + - test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image] + - test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image] - condition: ranges: system_gpu_count: diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index b027ffea425..c7debd02f99 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -269,7 +269,6 @@ examples/test_phi.py::test_phi_fp8_with_bf16_lora[Phi-3-small-128k-instruct] SKI examples/test_phi.py::test_phi_fp8_with_bf16_lora[Phi-3.5-mini-instruct] SKIP (https://nvbugs/5465143) examples/test_phi.py::test_phi_fp8_with_bf16_lora[Phi-4-mini-instruct] SKIP (https://nvbugs/5465143) examples/test_llama.py::test_llm_llama_v1_2gpu_summary[llama-7b-nb:4-enable_auto_parallel] SKIP (https://nvbugs/5453742) -test_e2e.py::test_ptp_quickstart_multimodal[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image-False] SKIP (https://nvbugs/5444095) full:GB200/examples/test_qwen.py::test_llm_qwen_7b_multi_gpus_summary[qwen1.5_7b_chat-enable_fmha_fp32_acc-enable_plugin-tp2pp2-nb:4] SKIP (https://nvbugs/5247837) full:GB200/examples/test_qwen.py::test_llm_qwen_7b_multi_gpus_summary[qwen2_7b_instruct-enable_fmha_fp32_acc-enable_plugin-tp2pp2-nb:4] SKIP (https://nvbugs/5247837) full:GB200/examples/test_qwen.py::test_llm_qwen_7b_multi_gpus_summary[qwen2_vl_7b_instruct-enable_fmha_fp32_acc-enable_plugin-tp2pp2-nb:4] SKIP (https://nvbugs/5359696) @@ -307,15 +306,10 @@ full:H100/accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8 full:H100/accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8[tp8ep4-cuda_graph=True] SKIP (https://nvbugs/5512734) full:H100/accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8[tp8-cuda_graph=True] SKIP (https://nvbugs/5512734) full:H100/accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8_eagle3[tp8-torch_compile=True] SKIP (https://nvbugs/5483534) -full:A100/test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-video-False] SKIP (https://nvbugs/5453725) -test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-image-False] SKIP (https://nvbugs/5509024) -test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video-False] SKIP (https://nvbugs/5509024) -test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video-True] SKIP (https://nvbugs/5509024) test_e2e.py::test_trtllm_multimodal_benchmark_serving SKIP (https://nvbugs/5523315) examples/test_llama.py::test_llm_llama_1gpu_fp8_kv_cache[llama-v2-7b-hf-bfloat16] SKIP (https://nvbugs/5527940) accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[tp4-mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True] SKIP (https://nvbugs/5528070) accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype SKIP (https://nvbugs/5527956) -test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-mixture_text_image-True] SKIP (https://nvbugs/5509024) accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput] SKIP (https://nvbugs/5481198) accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale_chunked_prefill[latency] SKIP (https://nvbugs/5481198) accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale_chunked_prefill[throughput] SKIP (https://nvbugs/5481198)