Skip to content

Commit 345954c

Browse files
committed
include flag to check chunked prefill flag during profiling
Signed-off-by: John Calderon <jcalderon@nvidia.com>
1 parent bcc9004 commit 345954c

File tree

4 files changed

+7
-9
lines changed

4 files changed

+7
-9
lines changed

tensorrt_llm/_torch/pyexecutor/_util.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,11 @@ def _create_dummy_mm_context_request(
193193
def _create_dummy_context_requests(
194194
self, input_seq_len: int) -> List[trtllm.Request]:
195195
requests = []
196-
if hasattr(self._model_engine.model,
197-
"original_arch") and MODEL_CLASS_VISION_ENCODER_MAPPING.get(
198-
self._model_engine.model.original_arch, None):
196+
if hasattr(
197+
self._model_engine.model,
198+
"original_arch") and MODEL_CLASS_VISION_ENCODER_MAPPING.get(
199+
self._model_engine.model.original_arch, None
200+
) and self._model_engine.attn_runtime_features.chunked_prefill:
199201
requests = self._create_dummy_mm_context_request(input_seq_len)
200202
# if succeed profiling with multimodal requests then return, otherwise profile
201203
# with default case

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3550,7 +3550,6 @@ class TestQwen2_VL_7B(LlmapiAccuracyTestHarness):
35503550
def test_auto_dtype(self):
35513551
with LLM(self.MODEL_PATH,
35523552
max_num_tokens=16384,
3553-
enable_chunked_prefill=True,
35543553
kv_cache_config=self.kv_cache_config) as llm:
35553554
task = MMMU(self.MODEL_NAME)
35563555
task.evaluate(llm, sampling_params=self.sampling_params)

tests/unittest/llmapi/apps/_test_openai_chat_multimodal.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def server(model_name: str, temp_extra_llm_api_options_file: str):
4848
model_path = get_model_path(model_name)
4949
args = [
5050
"--extra_llm_api_options", temp_extra_llm_api_options_file,
51-
"--max_batch_size", "64", "--enable_chunked_prefill"
51+
"--max_batch_size", "64"
5252
]
5353
with RemoteOpenAIServer(model_path, args) as remote_server:
5454
yield remote_server

tests/unittest/llmapi/apps/_test_trtllm_serve_multimodal_example.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,7 @@ def temp_extra_llm_api_options_file(request):
4646
@pytest.fixture(scope="module")
4747
def server(model_name: str, temp_extra_llm_api_options_file: str):
4848
model_path = get_model_path(model_name)
49-
args = [
50-
"--enable_chunked_prefill", "--extra_llm_api_options",
51-
temp_extra_llm_api_options_file
52-
]
49+
args = ["--extra_llm_api_options", temp_extra_llm_api_options_file]
5350
with RemoteOpenAIServer(model_path, port=8000,
5451
cli_args=args) as remote_server:
5552
yield remote_server

0 commit comments

Comments
 (0)