[5620723](add to main) Save model in original dtype for QAT example (#546)

Fridah-nv · web-flow · commit 479f729e1e86 · 2025-11-13T15:49:46.000-08:00
Pick change in #531 to main ## What does this PR do? **Type of change:** ?  **Overview:** ? ## Usage  ```python # Add a code snippet demonstrating how to use this ``` ## Testing  ## Before your PR is "*Ready for review*"  - **Make sure you read and follow [Contributor guidelines](https://github.com/NVIDIA/TensorRT-Model-Optimizer/blob/main/CONTRIBUTING.md)** and your commits are signed. - **Is this change backward compatible?**: Yes/No  - **Did you write any new necessary tests?**: Yes/No - **Did you add or update any necessary documentation?**: Yes/No - **Did you update [Changelog](https://github.com/NVIDIA/TensorRT-Model-Optimizer/blob/main/CHANGELOG.rst)?**: Yes/No  ## Additional Information  --------- Signed-off-by: Fridah-nv <201670829+Fridah-nv@users.noreply.github.com>
diff --git a/modelopt/torch/quantization/plugins/transformers_trainer.py b/modelopt/torch/quantization/plugins/transformers_trainer.py
@@ -16,6 +16,7 @@
 """ModelOpt plugin for transformers Trainer."""
 
 import gc
+import json
 import os
 import types
 from dataclasses import dataclass, field
@@ -168,6 +169,10 @@ def __init__(
         elif is_quantized(self.model):
             self._save_modelopt_state_with_weights()
 
+        self._original_dtype = getattr(
+            getattr(self.model, "config", None), "dtype", None
+        ) or getattr(getattr(self.model, "config", None), "torch_dtype", None)
+
     def _save_modelopt_state_with_weights(self):
         """Save the modelopt weights for fsdp2 models."""
         if torch.distributed.is_initialized():
@@ -256,23 +261,30 @@ def train(self, *args, **kwargs):
 
     def save_model(self, *args, **kwargs):
         """Save the quantized model."""
-        if (
-            (not self.is_in_train)
-            and self.is_fsdp_enabled
-            and self.accelerator.state.fsdp_plugin.state_dict_type != "FULL_STATE_DICT"
-        ):
-            print_rank_0("Setting state_dict_type to FULL_STATE_DICT for final checkpoint save.")
-            original_type = self.accelerator.state.fsdp_plugin.state_dict_type
-            self.accelerator.state.fsdp_plugin.set_state_dict_type("FULL_STATE_DICT")
-            outputs = super().save_model(*args, **kwargs)
-            if torch.distributed.is_initialized():
-                torch.distributed.barrier()
-            if mto.ModeloptStateManager.is_converted(self.accelerator.unwrap_model(self.model)):
+        if not self.is_in_train:
+            if (
+                self.is_fsdp_enabled
+                and self.accelerator.state.fsdp_plugin.state_dict_type != "FULL_STATE_DICT"
+            ):
                 print_rank_0(
-                    "Model saved. To restore, call mto.enable_huggingface_checkpointing() first before loading the "
-                    "model. See https://nvidia.github.io/TensorRT-Model-Optimizer/reference/generated/modelopt.torch.opt.plugins.huggingface.html#modelopt.torch.opt.plugins.huggingface.enable_huggingface_checkpointing"
+                    "Setting state_dict_type to FULL_STATE_DICT for final checkpoint save."
                 )
-            self.accelerator.state.fsdp_plugin.set_state_dict_type(original_type)
+                original_type = self.accelerator.state.fsdp_plugin.state_dict_type
+                self.accelerator.state.fsdp_plugin.set_state_dict_type("FULL_STATE_DICT")
+                outputs = super().save_model(*args, **kwargs)
+                if torch.distributed.is_initialized():
+                    torch.distributed.barrier()
+                if mto.ModeloptStateManager.is_converted(self.accelerator.unwrap_model(self.model)):
+                    print_rank_0(
+                        "Model saved. To restore, call mto.enable_huggingface_checkpointing() first before loading the "
+                        "model. See https://nvidia.github.io/TensorRT-Model-Optimizer/reference/generated/modelopt.torch.opt.plugins.huggingface.html#modelopt.torch.opt.plugins.huggingface.enable_huggingface_checkpointing"
+                    )
+                self.accelerator.state.fsdp_plugin.set_state_dict_type(original_type)
+            if self.args.should_save:
+                out_dir = args[0]
+                # FSDP may upcast parameter dtype to float32 during mixed-precision training,
+                # we convert it back to original dtype by updating `torch-dtype` in `config.json`
+                self._update_config_json_dtype(out_dir, str(self._original_dtype).split(".")[1])
         else:
             outputs = super().save_model(*args, **kwargs)
         return outputs
@@ -296,6 +308,32 @@ def _load_best_model(self, *args, **kwargs):
         else:
             super()._load_best_model(*args, **kwargs)
 
+    def _update_config_json_dtype(self, output_dir: str, dtype_str: str | None) -> None:
+        """Rewrite <output_dir>/config.json 'dtype' (preferred) or 'torch_dtype' to dtype_str."""
+        cfg_path = os.path.join(output_dir, "config.json")
+        if not os.path.isfile(cfg_path):
+            print_rank_0(f"[warn] config.json not found under {output_dir}; skip dtype rewrite.")
+            return
+        try:
+            with open(cfg_path, encoding="utf-8") as f:
+                data = json.load(f)
+            # Prefer 'dtype', else fall back to 'torch_dtype'
+            key_to_update = (
+                "dtype" if "dtype" in data else ("torch_dtype" if "torch_dtype" in data else None)
+            )
+            if key_to_update is None:
+                print_rank_0(
+                    "[warn] Neither 'dtype' nor 'torch_dtype' present in config.json; skip dtype rewrite."
+                )
+                return
+            if data.get(key_to_update) != dtype_str:
+                data[key_to_update] = dtype_str
+                with open(cfg_path, "w", encoding="utf-8") as f:
+                    json.dump(data, f, ensure_ascii=False, indent=2)
+                print_rank_0(f'Updated config.json: {key_to_update} -> "{dtype_str}"')
+        except Exception as e:
+            print_rank_0(f"[warn] Failed to update dtype in config.json: {e}")
+
     def _patch_accelerate_for_fsdp2_fix(self):
         """Fixes for accelerate prepare.