[None][chore] AutoDeploy: replace HF's deprecated keyword torch_dtype --> dtype (NVIDIA#8510)

lucaslie · web-flow · commit 9b54b3bfaf32 · 2025-10-21T17:07:06.000-04:00
Signed-off-by: Lucas Liebenwein &lt;11156568+lucaslie@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/auto_deploy/models/hf.py b/tensorrt_llm/_torch/auto_deploy/models/hf.py
@@ -107,15 +107,6 @@ def __init__(self, *args, **kwargs):
             self.model_kwargs,
         )
 
-        # special handling for torch_dtype in model_kwargs since HF does not correctly update
-        # torch_dtype string to an actual torch.dtype object (only with default)
-        if "torch_dtype" in self.model_kwargs:
-            dtype = self.model_kwargs["torch_dtype"]
-            if isinstance(dtype, str):
-                dtype = getattr(torch, self.model_kwargs["torch_dtype"])
-            assert isinstance(dtype, torch.dtype), f"Invalid dtype: {dtype}"
-            self.model_kwargs["torch_dtype"] = dtype
-
         # set sharding config source to huggingface
         self._sharding_config["source"] = ShardingConfigSource.HUGGINGFACE
 
@@ -159,6 +150,16 @@ def _recursive_update_config(
                 setattr(config, key, updated_value)
                 if child_unused:
                     nested_unused_kwargs[key] = child_unused
+            elif (
+                key in ["torch_dtype", "dtype"]
+                and isinstance(value_new, str)
+                and value_new != "auto"
+            ):
+                # check special handling of torch_dtype (DEPRECATED!) and dtype key to ensure we
+                # use the correct torch.dtype object instead of a string.
+                dtype = getattr(torch, value_new)
+                assert isinstance(dtype, torch.dtype), f"Invalid {dtype=}"
+                setattr(config, key, dtype)
             else:
                 # Direct update for simple values
                 setattr(config, key, value_new)
@@ -278,7 +279,7 @@ def build_and_load_model(self, device: DeviceLikeType) -> nn.Module:
                 "trust_remote_code": True,
                 "tp_plan": "auto",
                 **unused_kwargs,
-                "torch_dtype": "auto",  # takes precedence over unused_kwargs!
+                "dtype": "auto",  # takes precedence over unused_kwargs!
             },
         )
         model.eval()
diff --git a/tests/unittest/_torch/auto_deploy/_utils_test/_model_test_utils.py b/tests/unittest/_torch/auto_deploy/_utils_test/_model_test_utils.py
@@ -465,7 +465,7 @@ def apply_rotary_pos_emb_ds(q, k, cos, sin, position_ids, unsqueeze_dim=1):
     "ibm-ai-platform/Bamba-9B-v2": {
         "llm_models_subdir": "Bamba-9B-v2",
         "model_kwargs": {
-            "torch_dtype": "bfloat16",
+            "dtype": "bfloat16",
             "hidden_size": 64,
             "intermediate_size": 128,
             "mamba_chunk_size": 64,
@@ -484,7 +484,7 @@ def apply_rotary_pos_emb_ds(q, k, cos, sin, position_ids, unsqueeze_dim=1):
     "nvidia/NVIDIA-Nemotron-Nano-12B-v2": {
         "llm_models_subdir": "NVIDIA-Nemotron-Nano-12B-v2",
         "model_kwargs": {
-            "torch_dtype": "bfloat16",
+            "dtype": "bfloat16",
             "hidden_size": 32,
             "intermediate_size": 64,
             "mamba_head_dim": 40,
diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_hybrid_patches.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_hybrid_patches.py
@@ -48,7 +48,7 @@ def test_bamba_patches(model_dir: str, run_verify_generation: bool):
             **common_kwargs,
             "model_kwargs": {
                 "use_cache": use_cache,
-                "torch_dtype": "bfloat16",
+                "dtype": "bfloat16",
             },
         }
     llm_args = AutoDeployConfig(**llm_args)

Original file line number	Diff line number	Diff line change
`@@ -48,7 +48,7 @@ def test_bamba_patches(model_dir: str, run_verify_generation: bool):`
`48`	`48`	`**common_kwargs,`
`49`	`49`	`"model_kwargs": {`
`50`	`50`	`"use_cache": use_cache,`
`51`		`- "torch_dtype": "bfloat16",`
	`51`	`+ "dtype": "bfloat16",`
`52`	`52`	`},`
`53`	`53`	`}`
`54`	`54`	`llm_args = AutoDeployConfig(**llm_args)`