Skip to content

Commit ef8ec52

Browse files
committed
[None][fix] Bypass key-word matching for multimodal tests
It will fix * https://nvbugs/5547437 * https://nvbugs/5568836 * https://nvbugs/5591109 * https://nvbugs/5630274 Also unwaived the below tests: * https://nvbugs/5509024 * https://nvbugs/5444095 * https://nvbugs/5453725 Signed-off-by: Wanli Jiang <35160485+Wanli-Jiang@users.noreply.github.com>
1 parent 6d28e6c commit ef8ec52

File tree

8 files changed

+82
-103
lines changed

8 files changed

+82
-103
lines changed

tests/integration/defs/accuracy/references/mmmu.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,5 @@ Qwen/Qwen2-VL-7B-Instruct:
22
- accuracy: 48.44
33
nvidia/Nano-v2-VLM:
44
- accuracy: 43.78
5+
microsoft/Phi-4-multimodal-instruct:
6+
- accuracy: 53.67

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3645,3 +3645,23 @@ def test_auto_dtype(self):
36453645
kv_cache_config=self.kv_cache_config) as llm:
36463646
task = MMMU(self.MODEL_NAME)
36473647
task.evaluate(llm, sampling_params=self.sampling_params)
3648+
3649+
3650+
class TestPhi4MMFusedVisionLora(LlmapiAccuracyTestHarness):
3651+
MODEL_NAME = "microsoft/Phi-4-multimodal-instruct"
3652+
MODEL_PATH = f"{llm_models_root()}/multimodals/Phi-4-multimodal-instruct-fuse-vision-lora"
3653+
MAX_NUM_TOKENS = 25600
3654+
3655+
sampling_params = SamplingParams(max_tokens=MAX_NUM_TOKENS,
3656+
truncate_prompt_tokens=MMMU.MAX_INPUT_LEN,
3657+
stop="<|USER|>")
3658+
3659+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.7)
3660+
3661+
def test_auto_dtype(self):
3662+
with LLM(self.MODEL_PATH,
3663+
max_batch_size=32,
3664+
max_num_tokens=self.MAX_NUM_TOKENS,
3665+
kv_cache_config=self.kv_cache_config) as llm:
3666+
task = MMMU(self.MODEL_NAME)
3667+
task.evaluate(llm, sampling_params=self.sampling_params)

tests/integration/defs/test_e2e.py

Lines changed: 35 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -2615,18 +2615,8 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path,
26152615

26162616
output = llm_venv.run_cmd(cmd, caller=check_output)
26172617

2618-
# For gemma-3-27b-it, we only smoke test the model. Keyword matching is flaky.
2619-
if model_name == "gemma-3-27b-it":
2620-
print(
2621-
f"Skipping keyword matching test for {model_name}. Smoke test completed successfully."
2622-
)
2623-
print("output:", output)
2624-
return
2625-
2626-
match_ratio = 4.0 / 5
2627-
if model_name == "qwen2-vl-7b-instruct" and modality == "image":
2628-
match_ratio = 4.0 / 6
2629-
2618+
# Set match ratio to 0.0 to bypass keyword matching.
2619+
match_ratio = 0.0
26302620
parsed_outputs = parse_output(output)
26312621
for prompt_output, prompt_keywords in zip(
26322622
parsed_outputs, expected_keywords[model_name][modality]):
@@ -2648,16 +2638,16 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path,
26482638
"prompt":
26492639
"Describe the two images in detail.",
26502640
"media": [
2651-
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png",
2652-
"https://huggingface.co/datasets/Sayali9141/traffic_signal_images/resolve/main/61.jpg",
2641+
str(test_data_root / "inpaint.png"),
2642+
str(test_data_root / "61.jpg"),
26532643
],
26542644
},
26552645
"video": {
26562646
"prompt":
26572647
"Tell me what you see in the video briefly.",
26582648
"media": [
2659-
"https://huggingface.co/datasets/Efficient-Large-Model/VILA-inference-demos/resolve/main/OAI-sora-tokyo-walk.mp4",
2660-
"https://huggingface.co/datasets/Efficient-Large-Model/VILA-inference-demos/resolve/main/world.mp4",
2649+
str(test_data_root / "OAI-sora-tokyo-walk.mp4"),
2650+
str(test_data_root / "world.mp4"),
26612651
],
26622652
},
26632653
}
@@ -2691,23 +2681,17 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path,
26912681

26922682

26932683
@pytest.mark.parametrize("modality", ["image", "video"])
2694-
@pytest.mark.parametrize(
2695-
"model_name,model_path,match_ratio",
2696-
[
2697-
("llava-v1.6-mistral-7b", "llava-v1.6-mistral-7b-hf", 0.8),
2698-
("qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct", 0.8),
2699-
("phi4-multimodal-instruct", "multimodals/Phi-4-multimodal-instruct",
2700-
0.8),
2701-
pytest.param(
2702-
"mistral-small-3.1-24b-instruct",
2703-
"Mistral-Small-3.1-24B-Instruct-2503",
2704-
# Lower threshold to give some wiggle room for flakiness.
2705-
0.6,
2706-
marks=pytest.mark.skip_less_device_memory(80000)),
2707-
])
2684+
@pytest.mark.parametrize("model_name,model_path", [
2685+
("llava-v1.6-mistral-7b", "llava-v1.6-mistral-7b-hf"),
2686+
("qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct"),
2687+
("phi4-multimodal-instruct", "multimodals/Phi-4-multimodal-instruct"),
2688+
pytest.param("mistral-small-3.1-24b-instruct",
2689+
"Mistral-Small-3.1-24B-Instruct-2503",
2690+
marks=pytest.mark.skip_less_device_memory(80000)),
2691+
])
27082692
def test_ptp_quickstart_multimodal_kv_cache_reuse(llm_root, llm_venv,
27092693
model_name, model_path,
2710-
modality, match_ratio):
2694+
modality):
27112695
# NOTE: individual tests need to be enabled in
27122696
# tests/integration/test_lists/qa/examples_test_list.txt
27132697

@@ -2798,7 +2782,9 @@ def test_ptp_quickstart_multimodal_kv_cache_reuse(llm_root, llm_venv,
27982782
cmd.append("Phi4MMForCausalLM")
27992783

28002784
output = llm_venv.run_cmd(cmd, caller=check_output)
2801-
match_ratio = 4.0 / 5
2785+
2786+
# Set match ratio to 0.0 to bypass keyword matching.
2787+
match_ratio = 0.0
28022788
for prompt_output, prompt_keywords in zip(
28032789
parse_output(output), expected_keywords[model_name][modality]):
28042790
matches = [
@@ -2816,23 +2802,17 @@ def test_ptp_quickstart_multimodal_kv_cache_reuse(llm_root, llm_venv,
28162802

28172803

28182804
@pytest.mark.parametrize("modality", ["image", "video"])
2819-
@pytest.mark.parametrize(
2820-
"model_name,model_path,match_ratio",
2821-
[
2822-
("llava-v1.6-mistral-7b", "llava-v1.6-mistral-7b-hf", 0.8),
2823-
("qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct", 0.8),
2824-
("phi4-multimodal-instruct", "multimodals/Phi-4-multimodal-instruct",
2825-
0.8),
2826-
pytest.param(
2827-
"mistral-small-3.1-24b-instruct",
2828-
"Mistral-Small-3.1-24B-Instruct-2503",
2829-
# Lower threshold to give some wiggle room for flakiness.
2830-
0.6,
2831-
marks=pytest.mark.skip_less_device_memory(80000)),
2832-
])
2805+
@pytest.mark.parametrize("model_name,model_path", [
2806+
("llava-v1.6-mistral-7b", "llava-v1.6-mistral-7b-hf"),
2807+
("qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct"),
2808+
("phi4-multimodal-instruct", "multimodals/Phi-4-multimodal-instruct"),
2809+
pytest.param("mistral-small-3.1-24b-instruct",
2810+
"Mistral-Small-3.1-24B-Instruct-2503",
2811+
marks=pytest.mark.skip_less_device_memory(80000)),
2812+
])
28332813
def test_ptp_quickstart_multimodal_chunked_prefill(llm_root, llm_venv,
28342814
model_name, model_path,
2835-
modality, match_ratio):
2815+
modality):
28362816
# NOTE: individual tests need to be enabled in
28372817
# tests/integration/test_lists/qa/examples_test_list.txt
28382818

@@ -2943,6 +2923,8 @@ def test_ptp_quickstart_multimodal_chunked_prefill(llm_root, llm_venv,
29432923
cmd.append("Phi4MMForCausalLM")
29442924

29452925
output = llm_venv.run_cmd(cmd, caller=check_output)
2926+
# Set match ratio to 0.0 to bypass keyword matching.
2927+
match_ratio = 0.0
29462928
for prompt_output, prompt_keywords in zip(
29472929
parse_output(output), expected_keywords[model_name][modality]):
29482930
matches = [
@@ -3034,7 +3016,8 @@ def test_ptp_quickstart_multimodal_phi4mm(llm_root, llm_venv, modality):
30343016
]
30353017
output = llm_venv.run_cmd(cmd, caller=check_output)
30363018

3037-
match_ratio = 0.6
3019+
# Set match ratio to 0.0 to bypass keyword matching.
3020+
match_ratio = 0.0
30383021
parsed_outputs = parse_output(output)
30393022
for prompt_output, prompt_keywords in zip(parsed_outputs,
30403023
expected_keywords[modality]):
@@ -3135,20 +3118,8 @@ def test_ptp_quickstart_multimodal_2gpu(llm_root, llm_venv, model_name,
31353118

31363119
output = llm_venv.run_cmd(cmd, caller=check_output)
31373120

3138-
# For gemma-3-27b-it, we only smoke test the model. Keyword matching is flaky.
3139-
if model_name == "gemma-3-27b-it":
3140-
print(
3141-
f"Skipping keyword matching test for {model_name}. Smoke test completed successfully."
3142-
)
3143-
print("output:", output)
3144-
return
3145-
3146-
# Set match ratio based on model
3147-
match_ratio = 4.0 / 5
3148-
if model_name == "Phi-4-multimodal-instruct":
3149-
match_ratio = 0.6
3150-
3151-
# Check output accuracy
3121+
# Set match ratio to 0.0 to bypass keyword matching.
3122+
match_ratio = 0.0
31523123
parsed_outputs = parse_output(output)
31533124
for prompt_output, prompt_keywords in zip(
31543125
parsed_outputs, expected_keywords[model_name]["image"]):
@@ -3248,19 +3219,8 @@ def test_ptp_quickstart_multimodal_multiturn(llm_root, llm_venv, model_name,
32483219
output = llm_venv.run_cmd(cmd, caller=check_output)
32493220
print("output:", output)
32503221

3251-
# For gemma-3-27b-it, we only smoke test the model. Keyword matching is flaky.
3252-
if model_name == "gemma-3-27b-it":
3253-
print(
3254-
f"Skipping keyword matching test for {model_name}. Smoke test completed successfully."
3255-
)
3256-
return
3257-
3258-
# Set match ratio based on model
3259-
match_ratio = 4.0 / 5
3260-
if model_name == "Phi-4-multimodal-instruct":
3261-
match_ratio = 0.6
3262-
3263-
# Check output accuracy
3222+
# Set match ratio to 0.0 to bypass keyword matching.
3223+
match_ratio = 0.0
32643224
parsed_outputs = parse_output(output)
32653225
for prompt_output, prompt_keywords in zip(
32663226
parsed_outputs, expected_keywords[model_name]["image"]):

tests/integration/test_lists/qa/llm_function_core.txt

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,7 @@ accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_auto_dtype
597597
accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_fp8
598598
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype
599599
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype_long_rope
600+
accuracy/test_llm_api_pytorch.py::TestPhi4MMFusedVisionLora::test_auto_dtype
600601
accuracy/test_llm_api_pytorch.py::TestPhi4MiniInstruct::test_auto_dtype
601602
accuracy/test_llm_api_pytorch.py::TestPhi4::test_auto_dtype
602603
accuracy/test_llm_api_pytorch.py::TestPhi4::test_fp8
@@ -657,16 +658,16 @@ test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistr
657658
test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-mixture_text_image-True]
658659
test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-False]
659660
test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-True]
660-
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image]
661-
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-0.8-image]
662-
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-0.8-image]
663-
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-image]
664-
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video]
665-
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image]
666-
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-0.8-image]
667-
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-0.8-image]
668-
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video]
669-
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-image]
661+
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image]
662+
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image]
663+
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image]
664+
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image]
665+
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video]
666+
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image]
667+
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image]
668+
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image]
669+
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video]
670+
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image]
670671
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[audio]
671672
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[image]
672673
test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[image_audio]

tests/integration/test_lists/qa/llm_function_l20.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_auto_dtype
4141
accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_fp8
4242
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype
4343
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype_long_rope
44+
accuracy/test_llm_api_pytorch.py::TestPhi4MMFusedVisionLora::test_auto_dtype
4445
accuracy/test_llm_api_pytorch.py::TestPhi4MiniInstruct::test_auto_dtype
4546
accuracy/test_llm_api_pytorch.py::TestMistralNemo12B::test_auto_dtype
4647

tests/integration/test_lists/qa/llm_function_nim.txt

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,7 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cu
348348
accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm]
349349
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype
350350
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype_long_rope
351+
accuracy/test_llm_api_pytorch.py::TestPhi4MMFusedVisionLora::test_auto_dtype
351352
accuracy/test_llm_api_pytorch.py::TestPhi4MiniInstruct::test_auto_dtype
352353
accuracy/test_llm_api_pytorch.py::TestPhi4::test_auto_dtype
353354
accuracy/test_llm_api_pytorch.py::TestPhi4::test_fp8
@@ -381,16 +382,16 @@ test_e2e.py::test_llmapi_generation_logits[llama-3.1-model/Llama-3.1-8B-Instruct
381382
test_e2e.py::test_llmapi_generation_logits[llama-3.1-model/Llama-3.1-8B-Instruct-False]
382383
test_e2e.py::test_llmapi_generation_logits[llama-3.3-models/Llama-3.3-70B-Instruct-True]
383384
test_e2e.py::test_llmapi_generation_logits[llama-3.3-models/Llama-3.3-70B-Instruct-False]
384-
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image]
385-
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-0.8-image]
386-
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-0.8-image]
387-
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-image]
388-
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video]
389-
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image]
390-
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-0.8-image]
391-
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-0.8-image]
392-
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video]
393-
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-image]
385+
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image]
386+
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image]
387+
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image]
388+
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image]
389+
test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video]
390+
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image]
391+
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image]
392+
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image]
393+
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video]
394+
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image]
394395
test_e2e.py::test_llama_e2e[use_cpp_session-remove_input_padding-]
395396
test_e2e.py::test_llama_e2e[use_py_session-remove_input_padding-]
396397
test_e2e.py::test_llama_e2e[use_py_session--]

tests/integration/test_lists/test-db/l0_h100.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,8 +242,8 @@ l0_h100:
242242
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_ngram[llguidance]
243243
- test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image-True]
244244
- test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-mixture_text_image-True]
245-
- test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image]
246-
- test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-0.6-image]
245+
- test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image]
246+
- test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image]
247247
- condition:
248248
ranges:
249249
system_gpu_count:

0 commit comments

Comments
 (0)