@@ -2615,18 +2615,8 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path,
26152615
26162616 output = llm_venv .run_cmd (cmd , caller = check_output )
26172617
2618- # For gemma-3-27b-it, we only smoke test the model. Keyword matching is flaky.
2619- if model_name == "gemma-3-27b-it" :
2620- print (
2621- f"Skipping keyword matching test for { model_name } . Smoke test completed successfully."
2622- )
2623- print ("output:" , output )
2624- return
2625-
2626- match_ratio = 4.0 / 5
2627- if model_name == "qwen2-vl-7b-instruct" and modality == "image" :
2628- match_ratio = 4.0 / 6
2629-
2618+ # Set match ratio to 0.0 to bypass keyword matching.
2619+ match_ratio = 0.0
26302620 parsed_outputs = parse_output (output )
26312621 for prompt_output , prompt_keywords in zip (
26322622 parsed_outputs , expected_keywords [model_name ][modality ]):
@@ -2648,16 +2638,16 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path,
26482638 "prompt" :
26492639 "Describe the two images in detail." ,
26502640 "media" : [
2651- "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/ inpaint.png" ,
2652- "https://huggingface.co/datasets/Sayali9141/traffic_signal_images/resolve/main/ 61.jpg" ,
2641+ str ( test_data_root / " inpaint.png") ,
2642+ str ( test_data_root / " 61.jpg") ,
26532643 ],
26542644 },
26552645 "video" : {
26562646 "prompt" :
26572647 "Tell me what you see in the video briefly." ,
26582648 "media" : [
2659- "https://huggingface.co/datasets/Efficient-Large-Model/VILA-inference-demos/resolve/main/ OAI-sora-tokyo-walk.mp4" ,
2660- "https://huggingface.co/datasets/Efficient-Large-Model/VILA-inference-demos/resolve/main/ world.mp4" ,
2649+ str ( test_data_root / " OAI-sora-tokyo-walk.mp4") ,
2650+ str ( test_data_root / " world.mp4") ,
26612651 ],
26622652 },
26632653 }
@@ -2691,23 +2681,17 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path,
26912681
26922682
26932683@pytest .mark .parametrize ("modality" , ["image" , "video" ])
2694- @pytest .mark .parametrize (
2695- "model_name,model_path,match_ratio" ,
2696- [
2697- ("llava-v1.6-mistral-7b" , "llava-v1.6-mistral-7b-hf" , 0.8 ),
2698- ("qwen2.5-vl-7b-instruct" , "Qwen2.5-VL-7B-Instruct" , 0.8 ),
2699- ("phi4-multimodal-instruct" , "multimodals/Phi-4-multimodal-instruct" ,
2700- 0.8 ),
2701- pytest .param (
2702- "mistral-small-3.1-24b-instruct" ,
2703- "Mistral-Small-3.1-24B-Instruct-2503" ,
2704- # Lower threshold to give some wiggle room for flakiness.
2705- 0.6 ,
2706- marks = pytest .mark .skip_less_device_memory (80000 )),
2707- ])
2684+ @pytest .mark .parametrize ("model_name,model_path" , [
2685+ ("llava-v1.6-mistral-7b" , "llava-v1.6-mistral-7b-hf" ),
2686+ ("qwen2.5-vl-7b-instruct" , "Qwen2.5-VL-7B-Instruct" ),
2687+ ("phi4-multimodal-instruct" , "multimodals/Phi-4-multimodal-instruct" ),
2688+ pytest .param ("mistral-small-3.1-24b-instruct" ,
2689+ "Mistral-Small-3.1-24B-Instruct-2503" ,
2690+ marks = pytest .mark .skip_less_device_memory (80000 )),
2691+ ])
27082692def test_ptp_quickstart_multimodal_kv_cache_reuse (llm_root , llm_venv ,
27092693 model_name , model_path ,
2710- modality , match_ratio ):
2694+ modality ):
27112695 # NOTE: individual tests need to be enabled in
27122696 # tests/integration/test_lists/qa/examples_test_list.txt
27132697
@@ -2798,7 +2782,9 @@ def test_ptp_quickstart_multimodal_kv_cache_reuse(llm_root, llm_venv,
27982782 cmd .append ("Phi4MMForCausalLM" )
27992783
28002784 output = llm_venv .run_cmd (cmd , caller = check_output )
2801- match_ratio = 4.0 / 5
2785+
2786+ # Set match ratio to 0.0 to bypass keyword matching.
2787+ match_ratio = 0.0
28022788 for prompt_output , prompt_keywords in zip (
28032789 parse_output (output ), expected_keywords [model_name ][modality ]):
28042790 matches = [
@@ -2816,23 +2802,17 @@ def test_ptp_quickstart_multimodal_kv_cache_reuse(llm_root, llm_venv,
28162802
28172803
28182804@pytest .mark .parametrize ("modality" , ["image" , "video" ])
2819- @pytest .mark .parametrize (
2820- "model_name,model_path,match_ratio" ,
2821- [
2822- ("llava-v1.6-mistral-7b" , "llava-v1.6-mistral-7b-hf" , 0.8 ),
2823- ("qwen2.5-vl-7b-instruct" , "Qwen2.5-VL-7B-Instruct" , 0.8 ),
2824- ("phi4-multimodal-instruct" , "multimodals/Phi-4-multimodal-instruct" ,
2825- 0.8 ),
2826- pytest .param (
2827- "mistral-small-3.1-24b-instruct" ,
2828- "Mistral-Small-3.1-24B-Instruct-2503" ,
2829- # Lower threshold to give some wiggle room for flakiness.
2830- 0.6 ,
2831- marks = pytest .mark .skip_less_device_memory (80000 )),
2832- ])
2805+ @pytest .mark .parametrize ("model_name,model_path" , [
2806+ ("llava-v1.6-mistral-7b" , "llava-v1.6-mistral-7b-hf" ),
2807+ ("qwen2.5-vl-7b-instruct" , "Qwen2.5-VL-7B-Instruct" ),
2808+ ("phi4-multimodal-instruct" , "multimodals/Phi-4-multimodal-instruct" ),
2809+ pytest .param ("mistral-small-3.1-24b-instruct" ,
2810+ "Mistral-Small-3.1-24B-Instruct-2503" ,
2811+ marks = pytest .mark .skip_less_device_memory (80000 )),
2812+ ])
28332813def test_ptp_quickstart_multimodal_chunked_prefill (llm_root , llm_venv ,
28342814 model_name , model_path ,
2835- modality , match_ratio ):
2815+ modality ):
28362816 # NOTE: individual tests need to be enabled in
28372817 # tests/integration/test_lists/qa/examples_test_list.txt
28382818
@@ -2943,6 +2923,8 @@ def test_ptp_quickstart_multimodal_chunked_prefill(llm_root, llm_venv,
29432923 cmd .append ("Phi4MMForCausalLM" )
29442924
29452925 output = llm_venv .run_cmd (cmd , caller = check_output )
2926+ # Set match ratio to 0.0 to bypass keyword matching.
2927+ match_ratio = 0.0
29462928 for prompt_output , prompt_keywords in zip (
29472929 parse_output (output ), expected_keywords [model_name ][modality ]):
29482930 matches = [
@@ -3034,7 +3016,8 @@ def test_ptp_quickstart_multimodal_phi4mm(llm_root, llm_venv, modality):
30343016 ]
30353017 output = llm_venv .run_cmd (cmd , caller = check_output )
30363018
3037- match_ratio = 0.6
3019+ # Set match ratio to 0.0 to bypass keyword matching.
3020+ match_ratio = 0.0
30383021 parsed_outputs = parse_output (output )
30393022 for prompt_output , prompt_keywords in zip (parsed_outputs ,
30403023 expected_keywords [modality ]):
@@ -3135,20 +3118,8 @@ def test_ptp_quickstart_multimodal_2gpu(llm_root, llm_venv, model_name,
31353118
31363119 output = llm_venv .run_cmd (cmd , caller = check_output )
31373120
3138- # For gemma-3-27b-it, we only smoke test the model. Keyword matching is flaky.
3139- if model_name == "gemma-3-27b-it" :
3140- print (
3141- f"Skipping keyword matching test for { model_name } . Smoke test completed successfully."
3142- )
3143- print ("output:" , output )
3144- return
3145-
3146- # Set match ratio based on model
3147- match_ratio = 4.0 / 5
3148- if model_name == "Phi-4-multimodal-instruct" :
3149- match_ratio = 0.6
3150-
3151- # Check output accuracy
3121+ # Set match ratio to 0.0 to bypass keyword matching.
3122+ match_ratio = 0.0
31523123 parsed_outputs = parse_output (output )
31533124 for prompt_output , prompt_keywords in zip (
31543125 parsed_outputs , expected_keywords [model_name ]["image" ]):
@@ -3248,19 +3219,8 @@ def test_ptp_quickstart_multimodal_multiturn(llm_root, llm_venv, model_name,
32483219 output = llm_venv .run_cmd (cmd , caller = check_output )
32493220 print ("output:" , output )
32503221
3251- # For gemma-3-27b-it, we only smoke test the model. Keyword matching is flaky.
3252- if model_name == "gemma-3-27b-it" :
3253- print (
3254- f"Skipping keyword matching test for { model_name } . Smoke test completed successfully."
3255- )
3256- return
3257-
3258- # Set match ratio based on model
3259- match_ratio = 4.0 / 5
3260- if model_name == "Phi-4-multimodal-instruct" :
3261- match_ratio = 0.6
3262-
3263- # Check output accuracy
3222+ # Set match ratio to 0.0 to bypass keyword matching.
3223+ match_ratio = 0.0
32643224 parsed_outputs = parse_output (output )
32653225 for prompt_output , prompt_keywords in zip (
32663226 parsed_outputs , expected_keywords [model_name ]["image" ]):
0 commit comments