[None][fix] Allow YAML config overwriting CLI args for trtllm-eval (#10296)

syuoni · web-flow · commit 13ffe52ad0c6 · 2025-12-25T15:08:15.000-05:00
Signed-off-by: Enwei Zhu &lt;21126786+syuoni@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/commands/eval.py b/tensorrt_llm/commands/eval.py
@@ -143,27 +143,30 @@ def main(ctx, model: str, tokenizer: Optional[str],
         "kv_cache_config": kv_cache_config,
     }
 
-    if extra_llm_api_options is not None:
-        llm_args = update_llm_args_with_extra_options(llm_args,
-                                                      extra_llm_api_options)
-
-    profiler.start("trtllm init")
     if backend == 'pytorch':
-        llm = PyTorchLLM(**llm_args,
-                         max_batch_size=max_batch_size,
-                         max_num_tokens=max_num_tokens,
-                         max_beam_width=max_beam_width,
-                         max_seq_len=max_seq_len)
+        llm_cls = PyTorchLLM
+        llm_args.update(max_batch_size=max_batch_size,
+                        max_num_tokens=max_num_tokens,
+                        max_beam_width=max_beam_width,
+                        max_seq_len=max_seq_len)
     elif backend == 'tensorrt':
+        llm_cls = LLM
         build_config = BuildConfig(max_batch_size=max_batch_size,
                                    max_num_tokens=max_num_tokens,
                                    max_beam_width=max_beam_width,
                                    max_seq_len=max_seq_len)
-        llm = LLM(**llm_args, build_config=build_config)
+        llm_args.update(build_config=build_config)
     else:
         raise click.BadParameter(
             f"{backend} is not a known backend, check help for available options.",
             param_hint="backend")
+
+    if extra_llm_api_options is not None:
+        llm_args = update_llm_args_with_extra_options(llm_args,
+                                                      extra_llm_api_options)
+
+    profiler.start("trtllm init")
+    llm = llm_cls(**llm_args)
     profiler.stop("trtllm init")
     elapsed_time = profiler.elapsed_time_in_sec("trtllm init")
     logger.info(f"TRTLLM initialization time: {elapsed_time:.3f} seconds.")