minor

realAsma · realAsma · commit 339b7e6a65ae · 2025-09-16T20:13:42.000Z
Signed-off-by: realAsma &lt;akuriparambi@nvidia.com&gt;
diff --git a/modelopt/torch/quantization/plugins/transformers_trainer.py b/modelopt/torch/quantization/plugins/transformers_trainer.py
@@ -206,10 +206,10 @@ def forward_loop(model):
             for batch in tqdm(data_loader, desc="Calibrating"):
                 batch = self._prepare_inputs(batch)
                 # Important: We should forward pass using the unwrapped model
-                # mtq.quantize will unwrap the model pass the unwrapped model to the forward_loop
+                # mtq.quantize will unwrap the model & pass to the forward_loop
                 self.model(**batch)
 
-        # TODO: Remove calibrate_with_adpaters - this should not be needed
+        # TODO: Remove calibrate_with_adapters - this should not be needed
         with calibrate_with_adapters(self.model, self.args):
             print_rank_0("Quantizing the model...")
             mtq.quantize(self.model, self.quant_cfg, forward_loop)  # type: ignore [arg-type]
@@ -252,7 +252,8 @@ def train(self, *args, **kwargs):
         """Train the model."""
         outputs = super().train(*args, **kwargs)
         print_rank_0(
-            "Training completed. Do not forget to save the final model using `trainer.save_model()`."
+            "Training completed. Please save the final model using `Trainer.save_model()` "
+            "to preserve ModelOpt states."
         )
         return outputs
 
@@ -264,10 +265,17 @@ def save_model(self, *args, **kwargs):
             and self.accelerator.state.fsdp_plugin.state_dict_type != "FULL_STATE_DICT"
         ):
             print_rank_0("Setting state_dict_type to FULL_STATE_DICT for final checkpoint save.")
+            original_type = self.accelerator.state.fsdp_plugin.state_dict_type
             self.accelerator.state.fsdp_plugin.set_state_dict_type("FULL_STATE_DICT")
             outputs = super().save_model(*args, **kwargs)
-            torch.distributed.barrier()
-            print_rank_0("Saved serialized model")
+            if torch.distributed.is_initialized():
+                torch.distributed.barrier()
+            if mto.ModeloptStateManager.is_converted(self.accelerator.unwrap_model(self.model)):
+                print_rank_0(
+                    "Model saved. To restore, call mto.enable_huggingface_checkpointing() first before loading the "
+                    "model. See https://nvidia.github.io/TensorRT-Model-Optimizer/reference/generated/modelopt.torch.opt.plugins.huggingface.html#modelopt.torch.opt.plugins.huggingface.enable_huggingface_checkpointing"
+                )
+            self.accelerator.state.fsdp_plugin.set_state_dict_type(original_type)
         else:
             outputs = super().save_model(*args, **kwargs)
         return outputs