Update trainer to save base model weights and config.json

sugunav14 · sugunav14 · commit afba3a98a7b4 · 2025-09-25T19:32:18.000Z
Signed-off-by: Suguna Velury &lt;178320438+sugunav14@users.noreply.github.com&gt;
diff --git a/examples/llm_qat/main.py b/examples/llm_qat/main.py
@@ -273,9 +273,6 @@ def train():
         kwargs = {"export_student": True} if training_args.distill else {}
         trainer.save_model(training_args.output_dir, **kwargs)
 
-    if training_args.lora and getattr(quant_args, "compress", False):
-        trainer.export_base_model()
-
 
 if __name__ == "__main__":
     train()
diff --git a/modelopt/torch/quantization/plugins/transformers_trainer.py b/modelopt/torch/quantization/plugins/transformers_trainer.py
@@ -21,14 +21,14 @@
 from dataclasses import dataclass, field
 
 import torch
+from safetensors.torch import save_file
 from tqdm import tqdm
 
 import modelopt.torch.opt as mto
 import modelopt.torch.quantization as mtq
 from modelopt.torch.distill import KDLossConfig
 from modelopt.torch.distill.mode import _convert_for_kd
 from modelopt.torch.distill.plugins.huggingface import KDTrainer
-from modelopt.torch.export.unified_export_hf import export_hf_checkpoint
 from modelopt.torch.opt.conversion import restore_from_modelopt_state
 from modelopt.torch.opt.plugins import ModelOptHFTrainer
 from modelopt.torch.quantization.config import QuantizeConfig
@@ -182,6 +182,18 @@ def _save_modelopt_state_with_weights(self):
 
         print_rank_0(f"Saved modelopt state to {self._modelopt_state_path}")
 
+        # Save base model compressed weights for QLoRA
+        if getattr(self.quant_args, "compress", False):
+            # Save base model config.json
+            self.model.config.save_pretrained(self.args.output_dir)
+
+            # Save base model compressed weights excluding lora weights
+            state_dict = self.model.state_dict()
+            for k in [key for key in state_dict if "lora" in key]:
+                del state_dict[k]
+
+            save_file(state_dict, f"{self.args.output_dir}/model.safetensors")
+
     def _restore_modelopt_state_with_weights(self):
         modelopt_state = torch.load(self._modelopt_state_path, weights_only=False)
         modelopt_weights = modelopt_state.pop("modelopt_state_weights", None)
@@ -288,12 +300,6 @@ def _load_best_model(self, *args, **kwargs):
             self.model.delete_adapter(adapter_name)
             self.model.load_adapter(self.state.best_model_checkpoint, adapter_name)
 
-    def export_base_model(self):
-        """Export the basemodel to HF checkpoint for deployment."""
-        # Save config.json
-        if self.accelerator.is_main_process:
-            export_hf_checkpoint(self.model, export_dir=f"{self.args.output_dir}/base_model")
-
     def _patch_accelerate_for_fsdp2_fix(self):
         """Fixes for accelerate prepare.