From e84da2ab3e903c05a3fa3bd250bc7651f751857e Mon Sep 17 00:00:00 2001
From: Wanli Jiang <35160485+Wanli-Jiang@users.noreply.github.com>
Date: Thu, 13 Nov 2025 23:13:21 -0800
Subject: [PATCH] [None][fix] Bypass key-word matching for multimodal tests

It will fix
* https://nvbugs/5547437
* https://nvbugs/5568836
* https://nvbugs/5591109
* https://nvbugs/5630274

Also unwaived the below tests:
* https://nvbugs/5509024
* https://nvbugs/5444095
* https://nvbugs/5453725

Signed-off-by: Wanli Jiang <35160485+Wanli-Jiang@users.noreply.github.com>
---
 .../defs/accuracy/references/mmmu.yaml        |  2 +
 .../defs/accuracy/test_llm_api_pytorch.py     | 20 +++++
 tests/integration/defs/test_e2e.py            | 87 ++++++++-----------
 .../test_lists/qa/llm_function_core.txt       | 21 ++---
 .../test_lists/qa/llm_function_l20.txt        |  1 +
 .../test_lists/qa/llm_function_nim.txt        | 21 ++---
 .../test_lists/test-db/l0_h100.yml            |  4 +-
 tests/integration/test_lists/waives.txt       |  6 --
 8 files changed, 82 insertions(+), 80 deletions(-)

diff --git a/tests/integration/defs/accuracy/references/mmmu.yaml b/tests/integration/defs/accuracy/references/mmmu.yaml
index b9dc7c11d71..d479afc59a7 100644
--- a/tests/integration/defs/accuracy/references/mmmu.yaml
+++ b/tests/integration/defs/accuracy/references/mmmu.yaml
@@ -2,3 +2,5 @@ Qwen/Qwen2-VL-7B-Instruct:
   - accuracy: 48.44
 nvidia/Nano-v2-VLM:
   - accuracy: 43.78
+microsoft/Phi-4-multimodal-instruct:
+  - accuracy: 53.67
diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
index 4b3d794bd7c..b7327315230 100644
--- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py
+++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
@@ -3645,3 +3645,23 @@ def test_auto_dtype(self):
                  kv_cache_config=self.kv_cache_config) as llm:
             task = MMMU(self.MODEL_NAME)
             task.evaluate(llm, sampling_params=self.sampling_params)
+
+
+class TestPhi4MMFusedVisionLora(LlmapiAccuracyTestHarness):
+    MODEL_NAME = "microsoft/Phi-4-multimodal-instruct"
+    MODEL_PATH = f"{llm_models_root()}/multimodals/Phi-4-multimodal-instruct-fuse-vision-lora"
+    MAX_NUM_TOKENS = 25600
+
+    sampling_params = SamplingParams(max_tokens=MAX_NUM_TOKENS,
+                                     truncate_prompt_tokens=MMMU.MAX_INPUT_LEN,
+                                     stop="<|USER|>")
+
+    kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.7)
+
+    def test_auto_dtype(self):
+        with LLM(self.MODEL_PATH,
+                 max_batch_size=32,
+                 max_num_tokens=self.MAX_NUM_TOKENS,
+                 kv_cache_config=self.kv_cache_config) as llm:
+            task = MMMU(self.MODEL_NAME)
+            task.evaluate(llm, sampling_params=self.sampling_params)
diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py
index 411c40248bc..cd7b3aa755d 100644
--- a/tests/integration/defs/test_e2e.py
+++ b/tests/integration/defs/test_e2e.py
@@ -2623,10 +2623,8 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path,
         print("output:", output)
         return
 
-    match_ratio = 4.0 / 5
-    if model_name == "qwen2-vl-7b-instruct" and modality == "image":
-        match_ratio = 4.0 / 6
-
+    # Set match ratio to 0.0 to bypass keyword matching.
+    match_ratio = 0.0
     parsed_outputs = parse_output(output)
     for prompt_output, prompt_keywords in zip(
             parsed_outputs, expected_keywords[model_name][modality]):
@@ -2648,16 +2646,16 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path,
             "prompt":
             "Describe the two images in detail.",
             "media": [
-                "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png",
-                "https://huggingface.co/datasets/Sayali9141/traffic_signal_images/resolve/main/61.jpg",
+                str(test_data_root / "inpaint.png"),
+                str(test_data_root / "61.jpg"),
             ],
         },
         "video": {
             "prompt":
             "Tell me what you see in the video briefly.",
             "media": [
-                "https://huggingface.co/datasets/Efficient-Large-Model/VILA-inference-demos/resolve/main/OAI-sora-tokyo-walk.mp4",
-                "https://huggingface.co/datasets/Efficient-Large-Model/VILA-inference-demos/resolve/main/world.mp4",
+                str(test_data_root / "OAI-sora-tokyo-walk.mp4"),
+                str(test_data_root / "world.mp4"),
             ],
         },
     }
@@ -2691,23 +2689,17 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path,
 
 
 @pytest.mark.parametrize("modality", ["image", "video"])
-@pytest.mark.parametrize(
-    "model_name,model_path,match_ratio",
-    [
-        ("llava-v1.6-mistral-7b", "llava-v1.6-mistral-7b-hf", 0.8),
-        ("qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct", 0.8),
-        ("phi4-multimodal-instruct", "multimodals/Phi-4-multimodal-instruct",
-         0.8),
-        pytest.param(
-            "mistral-small-3.1-24b-instruct",
-            "Mistral-Small-3.1-24B-Instruct-2503",
-            # Lower threshold to give some wiggle room for flakiness.
-            0.6,
-            marks=pytest.mark.skip_less_device_memory(80000)),
-    ])
+@pytest.mark.parametrize("model_name,model_path", [
+    ("llava-v1.6-mistral-7b", "llava-v1.6-mistral-7b-hf"),
+    ("qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct"),
+    ("phi4-multimodal-instruct", "multimodals/Phi-4-multimodal-instruct"),
+    pytest.param("mistral-small-3.1-24b-instruct",
+                 "Mistral-Small-3.1-24B-Instruct-2503",
+                 marks=pytest.mark.skip_less_device_memory(80000)),
+])
 def test_ptp_quickstart_multimodal_kv_cache_reuse(llm_root, llm_venv,
                                                   model_name, model_path,
-                                                  modality, match_ratio):
+                                                  modality):
     # NOTE: individual tests need to be enabled in
     # tests/integration/test_lists/qa/examples_test_list.txt
 
@@ -2798,7 +2790,9 @@ def test_ptp_quickstart_multimodal_kv_cache_reuse(llm_root, llm_venv,
         cmd.append("Phi4MMForCausalLM")
 
     output = llm_venv.run_cmd(cmd, caller=check_output)
-    match_ratio = 4.0 / 5
+
+    # Set match ratio to 0.0 to bypass keyword matching.
+    match_ratio = 0.0
     for prompt_output, prompt_keywords in zip(
             parse_output(output), expected_keywords[model_name][modality]):
         matches = [
@@ -2816,23 +2810,17 @@ def test_ptp_quickstart_multimodal_kv_cache_reuse(llm_root, llm_venv,
 
 
 @pytest.mark.parametrize("modality", ["image", "video"])
-@pytest.mark.parametrize(
-    "model_name,model_path,match_ratio",
-    [
-        ("llava-v1.6-mistral-7b", "llava-v1.6-mistral-7b-hf", 0.8),
-        ("qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct", 0.8),
-        ("phi4-multimodal-instruct", "multimodals/Phi-4-multimodal-instruct",
-         0.8),
-        pytest.param(
-            "mistral-small-3.1-24b-instruct",
-            "Mistral-Small-3.1-24B-Instruct-2503",
-            # Lower threshold to give some wiggle room for flakiness.
-            0.6,
-            marks=pytest.mark.skip_less_device_memory(80000)),
-    ])
+@pytest.mark.parametrize("model_name,model_path", [
+    ("llava-v1.6-mistral-7b", "llava-v1.6-mistral-7b-hf"),
+    ("qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct"),
+    ("phi4-multimodal-instruct", "multimodals/Phi-4-multimodal-instruct"),
+    pytest.param("mistral-small-3.1-24b-instruct",
+                 "Mistral-Small-3.1-24B-Instruct-2503",
+                 marks=pytest.mark.skip_less_device_memory(80000)),
+])
 def test_ptp_quickstart_multimodal_chunked_prefill(llm_root, llm_venv,
                                                    model_name, model_path,
-                                                   modality, match_ratio):
+                                                   modality):
     # NOTE: individual tests need to be enabled in
     # tests/integration/test_lists/qa/examples_test_list.txt
 
@@ -2943,6 +2931,8 @@ def test_ptp_quickstart_multimodal_chunked_prefill(llm_root, llm_venv,
         cmd.append("Phi4MMForCausalLM")
 
     output = llm_venv.run_cmd(cmd, caller=check_output)
+    # Set match ratio to 0.0 to bypass keyword matching.
+    match_ratio = 0.0
     for prompt_output, prompt_keywords in zip(
             parse_output(output), expected_keywords[model_name][modality]):
         matches = [
@@ -3034,7 +3024,8 @@ def test_ptp_quickstart_multimodal_phi4mm(llm_root, llm_venv, modality):
     ]
     output = llm_venv.run_cmd(cmd, caller=check_output)
 
-    match_ratio = 0.6
+    # Set match ratio to 0.0 to bypass keyword matching.
+    match_ratio = 0.0
     parsed_outputs = parse_output(output)
     for prompt_output, prompt_keywords in zip(parsed_outputs,
                                               expected_keywords[modality]):
@@ -3143,12 +3134,8 @@ def test_ptp_quickstart_multimodal_2gpu(llm_root, llm_venv, model_name,
         print("output:", output)
         return
 
-    # Set match ratio based on model
-    match_ratio = 4.0 / 5
-    if model_name == "Phi-4-multimodal-instruct":
-        match_ratio = 0.6
-
-    # Check output accuracy
+    # Set match ratio to 0.0 to bypass keyword matching.
+    match_ratio = 0.0
     parsed_outputs = parse_output(output)
     for prompt_output, prompt_keywords in zip(
             parsed_outputs, expected_keywords[model_name]["image"]):
@@ -3255,12 +3242,8 @@ def test_ptp_quickstart_multimodal_multiturn(llm_root, llm_venv, model_name,
         )
         return
 
-    # Set match ratio based on model
-    match_ratio = 4.0 / 5
-    if model_name == "Phi-4-multimodal-instruct":
-        match_ratio = 0.6
-
-    # Check output accuracy
+    # Set match ratio to 0.0 to bypass keyword matching.
+    match_ratio = 0.0
     parsed_outputs = parse_output(output)
     for prompt_output, prompt_keywords in zip(
             parsed_outputs, expected_keywords[model_name]["image"]):
diff --git a/tests/integration/test_lists/qa/llm_function_core.txt b/tests/integration/test_lists/qa/llm_function_core.txt
index e9bf3687952..52517818b0a 100644
--- a/tests/integration/test_lists/qa/llm_function_core.txt
+++ b/tests/integration/test_lists/qa/llm_function_core.txt
@@ -597,6 +597,7 @@ accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_auto_dtype
 accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_fp8
 accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype
 accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype_long_rope
+accuracy/test_llm_api_pytorch.py::TestPhi4MMFusedVisionLora::test_auto_dtype
 accuracy/test_llm_api_pytorch.py::TestPhi4MiniInstruct::test_auto_dtype
 accuracy/test_llm_api_pytorch.py::TestPhi4::test_auto_dtype
 accuracy/test_llm_api_pytorch.py::TestPhi4::test_fp8
@@ -657,16 +658,16 @@ test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistr
 test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-mixture_text_image-True]
 test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-False]
 test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-True]
-test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image]
-test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-0.8-image]
-test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-0.8-image]
-test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-image]
-test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video]
-test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image]
-test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-0.8-image]
-test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-0.8-image]
-test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video]
-test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-image]
+test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image]
+test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image]
+test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image]
+test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image]
+test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video]
+test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image]
+test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image]
+test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image]
+test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video]
+test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image]
 test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[audio]
 test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[image]
 test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[image_audio]
diff --git a/tests/integration/test_lists/qa/llm_function_l20.txt b/tests/integration/test_lists/qa/llm_function_l20.txt
index c95aa0ab7d2..7f6112f4518 100644
--- a/tests/integration/test_lists/qa/llm_function_l20.txt
+++ b/tests/integration/test_lists/qa/llm_function_l20.txt
@@ -41,6 +41,7 @@ accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_auto_dtype
 accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_fp8
 accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype
 accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype_long_rope
+accuracy/test_llm_api_pytorch.py::TestPhi4MMFusedVisionLora::test_auto_dtype
 accuracy/test_llm_api_pytorch.py::TestPhi4MiniInstruct::test_auto_dtype
 accuracy/test_llm_api_pytorch.py::TestMistralNemo12B::test_auto_dtype
 
diff --git a/tests/integration/test_lists/qa/llm_function_nim.txt b/tests/integration/test_lists/qa/llm_function_nim.txt
index 4e3812ddd06..55c69fe1375 100644
--- a/tests/integration/test_lists/qa/llm_function_nim.txt
+++ b/tests/integration/test_lists/qa/llm_function_nim.txt
@@ -348,6 +348,7 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cu
 accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm]
 accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype
 accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype_long_rope
+accuracy/test_llm_api_pytorch.py::TestPhi4MMFusedVisionLora::test_auto_dtype
 accuracy/test_llm_api_pytorch.py::TestPhi4MiniInstruct::test_auto_dtype
 accuracy/test_llm_api_pytorch.py::TestPhi4::test_auto_dtype
 accuracy/test_llm_api_pytorch.py::TestPhi4::test_fp8
@@ -381,16 +382,16 @@ test_e2e.py::test_llmapi_generation_logits[llama-3.1-model/Llama-3.1-8B-Instruct
 test_e2e.py::test_llmapi_generation_logits[llama-3.1-model/Llama-3.1-8B-Instruct-False]
 test_e2e.py::test_llmapi_generation_logits[llama-3.3-models/Llama-3.3-70B-Instruct-True]
 test_e2e.py::test_llmapi_generation_logits[llama-3.3-models/Llama-3.3-70B-Instruct-False]
-test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image]
-test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-0.8-image]
-test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-0.8-image]
-test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-image]
-test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video]
-test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image]
-test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-0.8-image]
-test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-0.8-image]
-test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video]
-test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-image]
+test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image]
+test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image]
+test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image]
+test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image]
+test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video]
+test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image]
+test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image]
+test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image]
+test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video]
+test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image]
 test_e2e.py::test_llama_e2e[use_cpp_session-remove_input_padding-]
 test_e2e.py::test_llama_e2e[use_py_session-remove_input_padding-]
 test_e2e.py::test_llama_e2e[use_py_session--]
diff --git a/tests/integration/test_lists/test-db/l0_h100.yml b/tests/integration/test_lists/test-db/l0_h100.yml
index 32b0afd9254..f8169776d8e 100644
--- a/tests/integration/test_lists/test-db/l0_h100.yml
+++ b/tests/integration/test_lists/test-db/l0_h100.yml
@@ -242,8 +242,8 @@ l0_h100:
   - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_ngram[llguidance]
   - test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image-True]
   - test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-mixture_text_image-True]
-  - test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image]
-  - test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image]
+  - test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image]
+  - test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image]
 - condition:
     ranges:
       system_gpu_count:
diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
index b027ffea425..c7debd02f99 100644
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@@ -269,7 +269,6 @@ examples/test_phi.py::test_phi_fp8_with_bf16_lora[Phi-3-small-128k-instruct] SKI
 examples/test_phi.py::test_phi_fp8_with_bf16_lora[Phi-3.5-mini-instruct] SKIP (https://nvbugs/5465143)
 examples/test_phi.py::test_phi_fp8_with_bf16_lora[Phi-4-mini-instruct] SKIP (https://nvbugs/5465143)
 examples/test_llama.py::test_llm_llama_v1_2gpu_summary[llama-7b-nb:4-enable_auto_parallel] SKIP (https://nvbugs/5453742)
-test_e2e.py::test_ptp_quickstart_multimodal[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image-False] SKIP (https://nvbugs/5444095)
 full:GB200/examples/test_qwen.py::test_llm_qwen_7b_multi_gpus_summary[qwen1.5_7b_chat-enable_fmha_fp32_acc-enable_plugin-tp2pp2-nb:4] SKIP (https://nvbugs/5247837)
 full:GB200/examples/test_qwen.py::test_llm_qwen_7b_multi_gpus_summary[qwen2_7b_instruct-enable_fmha_fp32_acc-enable_plugin-tp2pp2-nb:4] SKIP (https://nvbugs/5247837)
 full:GB200/examples/test_qwen.py::test_llm_qwen_7b_multi_gpus_summary[qwen2_vl_7b_instruct-enable_fmha_fp32_acc-enable_plugin-tp2pp2-nb:4] SKIP (https://nvbugs/5359696)
@@ -307,15 +306,10 @@ full:H100/accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8
 full:H100/accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8[tp8ep4-cuda_graph=True] SKIP (https://nvbugs/5512734)
 full:H100/accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8[tp8-cuda_graph=True] SKIP (https://nvbugs/5512734)
 full:H100/accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8_eagle3[tp8-torch_compile=True] SKIP (https://nvbugs/5483534)
-full:A100/test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-video-False] SKIP (https://nvbugs/5453725)
-test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-image-False] SKIP (https://nvbugs/5509024)
-test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video-False] SKIP (https://nvbugs/5509024)
-test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video-True] SKIP (https://nvbugs/5509024)
 test_e2e.py::test_trtllm_multimodal_benchmark_serving SKIP (https://nvbugs/5523315)
 examples/test_llama.py::test_llm_llama_1gpu_fp8_kv_cache[llama-v2-7b-hf-bfloat16] SKIP (https://nvbugs/5527940)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[tp4-mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True] SKIP (https://nvbugs/5528070)
 accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype SKIP (https://nvbugs/5527956)
-test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-mixture_text_image-True] SKIP (https://nvbugs/5509024)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput] SKIP (https://nvbugs/5481198)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale_chunked_prefill[latency] SKIP (https://nvbugs/5481198)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale_chunked_prefill[throughput] SKIP (https://nvbugs/5481198)