[TRTLLM-7287][test] add multimodal chunked_prefill cases (#8011)

ruodil · LarryXFly · web-flow · commit ab4b9966b206 · 2025-10-20T22:43:47.000-04:00
Signed-off-by: Ruodi Lu &lt;ruodil@users.noreply.github.com&gt;
Co-authored-by: Ruodi Lu &lt;ruodil@users.noreply.github.com&gt;
Co-authored-by: Larry Xu &lt;197874197+LarryXFly@users.noreply.github.com&gt;
diff --git a/tests/integration/defs/perf/pytorch_model_config.py b/tests/integration/defs/perf/pytorch_model_config.py
@@ -221,6 +221,26 @@ def get_model_yaml_config(model_label: str,
                 'stream_interval': 10,
                 'num_postprocess_workers': 4
             }
+        },
+        # Phi-4-multimodal-instruct with chunked prefill and kv_cache_reuse
+        {
+            'patterns': [
+                'phi_4_multimodal_instruct-bench-pytorch-bfloat16-maxbs:48-maxnt:256-input_output_len:500,2000-con:250',
+                'phi_4_multimodal_instruct-bench-pytorch-bfloat16-maxbs:128-maxnt:512-input_output_len:1000,1000-con:250'
+            ],
+            'config': {
+                'enable_chunked_prefill': True,
+            }
+        },
+        # Mistral-Small-3.1-24B-Instruct-2503 with chunked prefill and kv_cache_reuse
+        {
+            'patterns': [
+                'mistral_small_v3.1_24b-bench-pytorch-bfloat16-maxbs:48-maxnt:256-input_output_len:1000,2000-reqs:500-con:200',
+                'mistral_small_v3.1_24b-bench-pytorch-bfloat16-maxbs:128-maxnt:512-input_output_len:1000,2000-reqs:500-con:200'
+            ],
+            'config': {
+                'enable_chunked_prefill': True,
+            }
         }
     ]
 
diff --git a/tests/integration/test_lists/qa/llm_perf_core.yml b/tests/integration/test_lists/qa/llm_perf_core.yml
@@ -39,6 +39,9 @@ llm_perf_core:
   - perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-input_output_len:1000,1000-con:250]
   - perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-input_output_len:128,128]
   - perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-input_output_len:512,32]
+  # Phi-4-multimodal-instruct with chunked prefill and kv_cache_reuse
+  - perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-maxbs:48-maxnt:256-input_output_len:500,2000-con:250]
+  - perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-maxbs:128-maxnt:512-input_output_len:1000,1000-con:250]
   # Bielik-11B-v2.2-Instruct
   - perf/test_perf.py::test_perf[bielik_11b_v2.2_instruct-bench-pytorch-bfloat16-input_output_len:128,128]
   - perf/test_perf.py::test_perf[bielik_11b_v2.2_instruct-bench-pytorch-bfloat16-input_output_len:512,32]
@@ -52,6 +55,9 @@ llm_perf_core:
   #Mistral-Small-3.1-24B-Instruct-2503
   - perf/test_perf.py::test_perf[mistral_small_v3.1_24b-bench-pytorch-bfloat16-maxbs:1-input_output_len:1000,2000-reqs:8-con:1]
   - perf/test_perf.py::test_perf[mistral_small_v3.1_24b-bench-pytorch-bfloat16-input_output_len:1000,2000-reqs:500-con:200]
+  # Mistral-Small-3.1-24B-Instruct-2503 with chunked prefill and kv_cache_reuse
+  - perf/test_perf.py::test_perf[mistral_small_v3.1_24b-bench-pytorch-bfloat16-maxbs:48-maxnt:256-input_output_len:1000,2000-reqs:500-con:200]
+  - perf/test_perf.py::test_perf[mistral_small_v3.1_24b-bench-pytorch-bfloat16-maxbs:128-maxnt:512-input_output_len:1000,2000-reqs:500-con:200]
 
   # Test list validation
   - test_list_validation.py::test_list_validation