NVIDIA-NeMo
diff --git a/‎examples/conversion/stream_adapter_weights.py‎
Lines changed: 509 additions & 0 deletions b/‎examples/conversion/stream_adapter_weights.py‎
Lines changed: 509 additions & 0 deletions
diff --git a/‎src/megatron/bridge/models/conversion/auto_bridge.py‎
Lines changed: 54 additions & 3 deletions b/‎src/megatron/bridge/models/conversion/auto_bridge.py‎
Lines changed: 54 additions & 3 deletions
@@ -331,6 +331,7 @@ def export_hf_weights(
         cpu: bool = False,
         show_progress: bool = True,
         conversion_tasks: Optional[List[WeightConversionTask]] = None,
+        merge_adapter_weights: bool = True,
     ) -> Iterable["HFWeightTuple"]:
         """
         Export Megatron model weights to HuggingFace format.
@@ -352,6 +353,8 @@ def export_hf_weights(
                 *Please note that this is an advanced feature and should be used with caution.
                 The tasks needs to be built with the `get_conversion_tasks` method first and
                 carefully adjust based on your needs.*
+            merge_adapter_weights: Whether to gather and merge LoRA adapter weights into the base
+                tensors during export (defaults to True). Set to False to export only the base tensors.
 
 
         Yields:
@@ -376,6 +379,35 @@ def export_hf_weights(
             cpu=cpu,
             show_progress=show_progress,
             conversion_tasks=conversion_tasks,
+            merge_adapter_weights=merge_adapter_weights,
+        )
+
+    def export_adapter_weights(
+        self,
+        model: list[MegatronModelT],
+        cpu: bool = True,
+        show_progress: bool = True,
+    ) -> Iterable["HFWeightTuple"]:
+        """
+        Export only adapter weights from a Megatron model without merging them into base tensors.
+
+        This is useful when you want to save or inspect LoRA adapters independently from the
+        underlying pretrained weights.
+
+        Args:
+            model: Megatron model instance or list of instances
+            cpu: Whether to move tensors to CPU before yielding
+            show_progress: Display progress bar during export
+
+        Yields:
+            HFWeightTuple: Named tuples of (param_name, weight_tensor) for adapter parameters
+        """
+        dispatch_instance = (self._causal_lm_architecture, self._get_model_instance(model))
+        return model_bridge.stream_adapter_weights_megatron_to_hf(
+            dispatch_instance,
+            model,
+            cpu=cpu,
+            show_progress=show_progress,
         )
 
     def save_hf_pretrained(
@@ -385,6 +417,7 @@ def save_hf_pretrained(
         show_progress: bool = True,
         source_path: Optional[Union[str, Path]] = None,
         strict: bool = True,
+        merge_adapter_weights: bool = True,
     ) -> None:
         """
         Save a Megatron model in HuggingFace format.
@@ -410,6 +443,7 @@ def save_hf_pretrained(
                 HuggingFace model with custom modeling files needs to be referenced. If not specified,
                 the path will be automatically determined from the HuggingFace configuration.
             strict: Whether to perform strict validation during weight export
+            merge_adapter_weights: Whether to gather/merge LoRA adapter weights into base tensors during export.
 
 
         Example:
@@ -433,10 +467,21 @@ def save_hf_pretrained(
             # No distributed training, save artifacts
             self.hf_pretrained.save_artifacts(path, original_source_path=source_path)
 
-        self.save_hf_weights(model, path, show_progress, strict)
+        self.save_hf_weights(
+            model,
+            path,
+            show_progress,
+            strict,
+            merge_adapter_weights=merge_adapter_weights,
+        )
 
     def save_hf_weights(
-        self, model: list[MegatronModelT], path: str | Path, show_progress: bool = True, strict: bool = True
+        self,
+        model: list[MegatronModelT],
+        path: str | Path,
+        show_progress: bool = True,
+        strict: bool = True,
+        merge_adapter_weights: bool = True,
     ) -> None:
         """
         Save Megatron model weights in HuggingFace safetensors format.
@@ -457,6 +502,7 @@ def save_hf_weights(
             model: Megatron model instance or list of instances
             path: Directory path where weight files will be saved
             show_progress: Display progress bar during export
+            merge_adapter_weights: Whether to gather/merge LoRA adapter weights into base tensors during export.
 
         Raises:
             ValueError: If the state source doesn't support streaming save
@@ -478,7 +524,12 @@ def save_hf_weights(
             dist.barrier()
         dispatch_instance = (self._causal_lm_architecture, self._get_model_instance(model))
         generator = model_bridge.stream_weights_megatron_to_hf(
-            dispatch_instance, model, self.hf_pretrained, cpu=True, show_progress=show_progress
+            dispatch_instance,
+            model,
+            self.hf_pretrained,
+            cpu=True,
+            show_progress=show_progress,
+            merge_adapter_weights=merge_adapter_weights,
         )
 
         # Check if the state source is SafeTensorsStateSource for streaming save.