Skip to content

Commit f0b86b6

Browse files
committed
include flag to check chunked prefill flag during profiling
Signed-off-by: John Calderon <jcalderon@nvidia.com>
1 parent 85ec2fc commit f0b86b6

File tree

4 files changed

+7
-9
lines changed

4 files changed

+7
-9
lines changed

tensorrt_llm/_torch/pyexecutor/_util.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,9 +205,11 @@ def _create_dummy_mm_context_request(
205205
def _create_dummy_context_requests(
206206
self, input_seq_len: int) -> List[trtllm.Request]:
207207
requests = []
208-
if hasattr(self._model_engine.model,
209-
"original_arch") and MODEL_CLASS_VISION_ENCODER_MAPPING.get(
210-
self._model_engine.model.original_arch, None):
208+
if hasattr(
209+
self._model_engine.model,
210+
"original_arch") and MODEL_CLASS_VISION_ENCODER_MAPPING.get(
211+
self._model_engine.model.original_arch, None
212+
) and self._model_engine.attn_runtime_features.chunked_prefill:
211213
requests = self._create_dummy_mm_context_request(input_seq_len)
212214
# if succeed profiling with multimodal requests then return, otherwise profile
213215
# with default case

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3500,7 +3500,6 @@ class TestQwen2_VL_7B(LlmapiAccuracyTestHarness):
35003500
def test_auto_dtype(self):
35013501
with LLM(self.MODEL_PATH,
35023502
max_num_tokens=16384,
3503-
enable_chunked_prefill=True,
35043503
kv_cache_config=self.kv_cache_config) as llm:
35053504
task = MMMU(self.MODEL_NAME)
35063505
task.evaluate(llm, sampling_params=self.sampling_params)

tests/unittest/llmapi/apps/_test_openai_chat_multimodal.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def server(model_name: str, temp_extra_llm_api_options_file: str):
4848
model_path = get_model_path(model_name)
4949
args = [
5050
"--extra_llm_api_options", temp_extra_llm_api_options_file,
51-
"--max_batch_size", "64", "--enable_chunked_prefill"
51+
"--max_batch_size", "64"
5252
]
5353
with RemoteOpenAIServer(model_path, args) as remote_server:
5454
yield remote_server

tests/unittest/llmapi/apps/_test_trtllm_serve_multimodal_example.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,7 @@ def temp_extra_llm_api_options_file(request):
4646
@pytest.fixture(scope="module")
4747
def server(model_name: str, temp_extra_llm_api_options_file: str):
4848
model_path = get_model_path(model_name)
49-
args = [
50-
"--enable_chunked_prefill", "--extra_llm_api_options",
51-
temp_extra_llm_api_options_file
52-
]
49+
args = ["--extra_llm_api_options", temp_extra_llm_api_options_file]
5350
with RemoteOpenAIServer(model_path, port=8000,
5451
cli_args=args) as remote_server:
5552
yield remote_server

0 commit comments

Comments
 (0)