Applied review comment

taylor-yb-lee · taylor-yb-lee · commit 3d589a11e719 · 2026-01-06T00:58:21.000-08:00
Signed-off-by: Taylor Yeonbok Lee &lt;249374542+taylor-yb-lee@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/model_config.py b/tensorrt_llm/_torch/model_config.py
@@ -456,7 +456,6 @@ def cached_file(path_or_repo_id, file_name):
         # Apply model_kwargs to override config parameters if provided
         model_kwargs = kwargs.pop('model_kwargs', None)
         if model_kwargs:
-            from tensorrt_llm.logger import logger
 
             def _recursive_update_config(config: transformers.PretrainedConfig,
                                          update_dict: Dict[str, Any]):
diff --git a/tensorrt_llm/llmapi/llm_args.py b/tensorrt_llm/llmapi/llm_args.py
@@ -1912,7 +1912,7 @@ class BaseLlmArgs(StrictBaseModel):
         description="Optional parameters overriding model config defaults. "
         "Precedence: (1) model_kwargs, (2) model config file, (3) model config class defaults. "
         "Unknown keys are ignored",
-        status="beta")
+        status="prototype")
 
     pipeline_parallel_size: int = Field(
         default=1, description="The pipeline parallel size.")
diff --git a/tests/unittest/api_stability/references/llm.yaml b/tests/unittest/api_stability/references/llm.yaml
@@ -228,9 +228,9 @@ methods:
         default: null
         status: prototype
       model_kwargs:
-        annotation: object
+        annotation: Optional[Dict[str, Any]]
         default: null
-        status: beta
+        status: prototype
     return_annotation: None
   generate:
     parameters:
diff --git a/tests/unittest/llmapi/test_llm_args.py b/tests/unittest/llmapi/test_llm_args.py
@@ -138,28 +138,17 @@ def test_llm_args_with_pydantic_options(self):
         assert llm_args.max_num_tokens == 256
         assert llm_args.max_seq_len == 128
 
-    def test_llm_args_with_model_kwargs_trt(self):
+    @pytest.mark.parametrize("llm_args_cls", [TrtLlmArgs, TorchLlmArgs])
+    def test_llm_args_with_model_kwargs(self, llm_args_cls):
         yaml_content = """
 model_kwargs:
     num_hidden_layers: 2
     """
         dict_content = self._yaml_to_dict(yaml_content)
-        llm_args = TrtLlmArgs(model=llama_model_path)
+        llm_args = llm_args_cls(model=llama_model_path)
         llm_args_dict = update_llm_args_with_extra_dict(llm_args.model_dump(),
                                                         dict_content)
-        llm_args = TrtLlmArgs(**llm_args_dict)
-        assert llm_args.model_kwargs['num_hidden_layers'] == 2
-
-    def test_llm_args_with_model_kwargs_pt(self):
-        yaml_content = """
-model_kwargs:
-    num_hidden_layers: 2
-    """
-        dict_content = self._yaml_to_dict(yaml_content)
-        llm_args = TorchLlmArgs(model=llama_model_path)
-        llm_args_dict = update_llm_args_with_extra_dict(llm_args.model_dump(),
-                                                        dict_content)
-        llm_args = TorchLlmArgs(**llm_args_dict)
+        llm_args = llm_args_cls(**llm_args_dict)
         assert llm_args.model_kwargs['num_hidden_layers'] == 2
 
 
@@ -473,12 +462,13 @@ def test_dynamic_setattr(self):
     def test_model_kwargs_with_num_hidden_layers(self):
         """Test that model_kwargs can override num_hidden_layers."""
         from tensorrt_llm._torch.model_config import ModelConfig
-
+        config_no_kwargs = ModelConfig.from_pretrained(
+            llama_model_path).pretrained_config
         model_kwargs = {'num_hidden_layers': 2}
-
-        config = ModelConfig.from_pretrained(llama_model_path,
-                                             model_kwargs=model_kwargs)
-        assert config.pretrained_config.num_hidden_layers == 2
+        config_with_kwargs = ModelConfig.from_pretrained(
+            llama_model_path, model_kwargs=model_kwargs).pretrained_config
+        assert config_no_kwargs.num_hidden_layers != config_with_kwargs.num_hidden_layers
+        assert config_with_kwargs.num_hidden_layers == 2
 
 
 class TestTrtLlmArgs: