Update: removed the permodule restore and state

jingyu-ml · jingyu-ml · commit 98ef9fb96ddf · 2025-09-30T07:35:46.000Z
Signed-off-by: Jingyu Xin &lt;jingyux@nvidia.com&gt;
diff --git a/modelopt/torch/peft/conversion.py b/modelopt/torch/peft/conversion.py
@@ -17,13 +17,11 @@
 
 import fnmatch
 from collections.abc import Callable, Iterable
-from typing import Any
 
 import torch.nn as nn
 
-from modelopt.torch.opt.conversion import ApplyModeError, ModelLikeModule, ModeloptStateManager
+from modelopt.torch.opt.conversion import ModelLikeModule, ModeloptStateManager
 from modelopt.torch.opt.mode import ConvertReturnType, MetadataDict
-from modelopt.torch.utils import get_unwrapped_name
 
 from .config import PEFTConfig
 from .lora.layer import LoRAModule, LoRAModuleRegistry
@@ -34,7 +32,6 @@
     "replace_lora_module",
     "unfreeze_base_weights",
     "unfreeze_lora_weights",
-    "update_peft_metadata_in_model",
 ]
 
 
@@ -48,64 +45,17 @@ def convert_to_peft_model(model: ModelLikeModule, config: PEFTConfig) -> Convert
     metadata = {}
     add_adapter(model, config)
     update_grads(model, config)
-    update_peft_metadata(model, config, metadata)
 
     return model, metadata
 
 
 def restore_peft_model(
     model: ModelLikeModule, config: PEFTConfig, metadata: MetadataDict
 ) -> nn.Module:
-    convert_to_peft_model(model, config)
-    return restore_peft_state(model, metadata)
-
-
-def restore_peft_state(model: ModelLikeModule, metadata: MetadataDict):
-    """Restore PEFT state from metadata or extra_state.
-
-    For backward compatibility, we check metadata first. For distributed
-    checkpoints (NeMo-MCore), the state will be in extra_state of each LoRAModule
-    and will be restored automatically via set_extra_state() during load_state_dict().
-
-    Args:
-        model: Model with LoRA modules to restore
-        metadata: Metadata dictionary that may contain peft_state
-    Returns:
-        The model with restored PEFT state
-    """
-    if "peft_state" not in metadata:
-        # For distributed checkpoints (NeMo-MCore), peft_state is stored
-        # in each LoRAModule's extra_state and will be restored via
-        # set_extra_state() during load_state_dict()
-        return model
-
-    # Legacy path: restore from metadata
-    peft_state_dict = metadata["peft_state"]
-    for name, module in model.named_modules():
-        if isinstance(module, LoRAModule):
-            unwrapped_name = get_unwrapped_name(name)
-            if unwrapped_name in peft_state_dict:
-                try:
-                    module.set_from_peft_state(peft_state_dict[unwrapped_name])
-                except Exception as e:
-                    raise ApplyModeError(f"Failed to restore PEFT state for module {name}: {e}")
-
+    model, _ = convert_to_peft_model(model, config)
     return model
 
 
-def update_peft_metadata(model: nn.Module, config: PEFTConfig, metadata: MetadataDict) -> None:
-    """Update the PEFT/LoRA state in the metadata dict."""
-    metadata["peft_state"] = peft_state(model)
-
-
-def peft_state(model: nn.Module) -> dict[str, Any]:
-    return {
-        get_unwrapped_name(n): m.get_peft_state()
-        for n, m in model.named_modules()
-        if isinstance(m, LoRAModule)
-    }
-
-
 def replace_lora_module(
     model: nn.Module, version=None, config: PEFTConfig = None, registry=LoRAModuleRegistry
 ):
@@ -137,32 +87,8 @@ def _replace_lora_module(model: nn.Module, version=None, registry=LoRAModuleRegi
         _replace_lora_module(getattr(model, name), version=version, registry=registry)
 
 
-def update_peft_metadata_in_model(model: nn.Module) -> None:
-    """Update the PEFT metadata in the model's ModeloptStateManager.
-
-    This function should be called after manually modifying LoRA adapters to ensure
-    the metadata stored in the ModeloptStateManager reflects the current state.
-
-    Args:
-        model: Model with LoRA modules whose metadata needs updating
-    Example:
-        >>> # After manually adding/modifying adapters
-        >>> for module in model.modules():
-        ...     if isinstance(module, LoRAModule):
-        ...         module.update_layer_lora("custom_adapter", rank=32)
-        >>> # Update metadata to reflect changes
-        >>> update_peft_metadata_in_model(model)
-    """
-    # Check if model has ModeloptStateManager (has been converted with peft mode)
-    if not ModeloptStateManager.is_converted(model):
-        return
-
-    # Get the state manager
-    manager = ModeloptStateManager(model)
-
-    # Update the metadata with current PEFT state
-    if manager._state and manager._last_metadata is not None:
-        manager._last_metadata["peft_state"] = peft_state(model)
+def update_peft_metadata(model: nn.Module, config: PEFTConfig, metadata: MetadataDict) -> None:
+    """Placeholder for the metadata-related function; not needed in this mode."""
 
 
 def add_adapter(model, config: PEFTConfig):
diff --git a/modelopt/torch/peft/convert.py b/modelopt/torch/peft/convert.py
@@ -39,7 +39,6 @@
 __all__ = [
     "disable_adapters",
     "enable_adapters",
-    "get_adapter_states",
     "is_peft_model",
     "update_model",
 ]
@@ -192,46 +191,6 @@ def enable_adapters(model, layers_to_enable=None, adapters_to_enable=None):
     )
 
 
-def get_adapter_states(model):
-    """Get the current state of all adapters in the model.
-
-    Args:
-        model: Model with LoRA adapters
-
-    Returns:
-        Dict mapping module names to their adapter states
-
-    Example:
-        >>> states = get_adapter_states(model)
-        >>> print(states)
-        {
-            'transformer.layers.0.attention': {
-                'default': {'enabled': True, 'rank': 32},
-                'finetuned': {'enabled': False, 'rank': 64}
-            },
-            'transformer.layers.0.mlp': {
-                'default': {'enabled': True, 'rank': 32}
-            }
-        }
-    """
-    assert is_peft_model(model), "It's not a MO-PEFT model"
-
-    adapter_states = {}
-    for module_name, module in model.named_modules():
-        if isinstance(module, LoRAModule):
-            module_adapters = {}
-            for adapter_name, adapter_dict in module._lora_adapters.items():
-                module_adapters[adapter_name] = {
-                    "enabled": adapter_dict.get("enable", True),
-                    "rank": adapter_dict.get("rank", "unknown"),
-                    "scale": adapter_dict.get("scale", 1.0),
-                }
-            if module_adapters:
-                adapter_states[module_name] = module_adapters
-
-    return adapter_states
-
-
 def is_megatron_core_model(model) -> bool:
     if MEGATRON_LAYERS:
         for m in model.modules():
diff --git a/modelopt/torch/peft/lora/layer.py b/modelopt/torch/peft/lora/layer.py
@@ -1,7 +1,6 @@
 """LoRA (Low-Rank Adaptation) module implementation."""
 
 import math
-import warnings
 from abc import abstractmethod
 from typing import Any
 
@@ -28,6 +27,11 @@ def get_init_methods(init_method: str = "kaiming_init"):
         )  # LoRA A: Kaiming uniform
     elif init_method == "zero_init":
         return lambda weight: init.zeros_(weight)  # LoRA B: zeros
+    else:
+        raise ValueError(
+            f"Unsupported initialization method: '{init_method}'. "
+            "Supported methods: 'kaiming_init', 'zero_init'"
+        )
 
 
 class LoRAModule(DynamicModule):
@@ -98,108 +102,6 @@ def update_layer_lora(
         """
         raise NotImplementedError("Subclasses must implement update_layer_lora")
 
-    def get_peft_state(self) -> dict[str, Any]:
-        """Get PEFT/LoRA state to be saved in checkpoint.
-
-        This method returns the configuration and state of all LoRA adapters
-        without including the actual weight tensors.
-
-        Returns:
-            Dictionary containing:
-            - adapters: Dict mapping adapter names to their configuration
-        """
-        modelopt_state = {}
-
-        # Store adapter configurations
-        adapters_config = {}
-        for adapter_name, adapter_modules in self._lora_adapters.items():
-            lora_a = adapter_modules["lora_a"]
-            lora_b = adapter_modules["lora_b"]
-
-            # Get explicitly stored rank for reliability
-            rank = adapter_modules.get("rank", None)
-
-            # If rank is not stored (legacy case), try to infer it
-            if rank is None:
-                if hasattr(lora_a, "output_size"):
-                    rank = lora_a.output_size
-                elif hasattr(lora_b, "input_size"):
-                    rank = lora_b.input_size
-                elif hasattr(lora_a, "out_features"):
-                    rank = lora_a.out_features
-                elif hasattr(lora_b, "in_features"):
-                    rank = lora_b.in_features
-
-            adapters_config[adapter_name] = {
-                "rank": rank,
-                "enable": adapter_modules.get("enable", True),
-                "scale": adapter_modules.get("scale", 1.0),
-            }
-
-        modelopt_state["adapters"] = adapters_config
-
-        return modelopt_state
-
-    def get_extra_state(self) -> dict[str, Any]:
-        """Get extra state for distributed checkpointing.
-
-        For distributed/sharded checkpoints (like NeMo-MCore), we store the PEFT state
-        as extra_state instead of in metadata. This handles cases where module names
-        change with different parallelism settings (TP, PP, EP).
-
-        Returns:
-            Dictionary containing the PEFT/LoRA adapter state
-        """
-        # Only return state if we have adapters
-        if not self._lora_adapters:
-            return {}
-
-        # Get the current PEFT state
-        peft_state = self.get_peft_state()
-
-        return {"modelopt_peft_state": peft_state}
-
-    def set_from_peft_state(self, peft_state: dict[str, Any]) -> None:
-        """Restore LoRA adapters from saved PEFT state.
-
-        This method recreates LoRA adapters based on their saved configuration.
-        Note: This only restores the adapter structure, not the weights.
-
-        Args:
-            peft_state: Dictionary containing adapter configurations
-        """
-        adapters_config = peft_state.get("adapters", {})
-
-        for adapter_name, config in adapters_config.items():
-            if adapter_name not in self._lora_adapters:
-                self.update_layer_lora(adapter_name, config)
-
-    def set_extra_state(self, state: dict[str, Any]) -> None:
-        """Restore extra state for distributed checkpointing.
-
-        This method is called during load_state_dict() to restore the PEFT/LoRA state
-        from distributed checkpoints. It handles the adapter configuration but not
-        the actual weights (which are restored through the normal state_dict mechanism).
-
-        Args:
-            state: Dictionary containing the extra state to restore
-        """
-        if state is None:
-            return
-
-        peft_state = state.get("modelopt_peft_state")
-        if peft_state is None:
-            return
-
-        # Restore the PEFT state
-        try:
-            self.set_from_peft_state(peft_state)
-        except Exception as e:
-            warnings.warn(
-                f"Failed to restore PEFT state from extra_state: {e}. "
-                "This might happen if the model structure has changed."
-            )
-
     def forward(self, x: torch.Tensor, *args, **kwargs) -> Any:
         """Forward pass with LoRA adaptation.
 
diff --git a/modelopt/torch/peft/mode.py b/modelopt/torch/peft/mode.py
@@ -1,51 +1,69 @@
+"""PEFT mode descriptors for model optimization."""
+
 from modelopt.torch.opt.config import ModeloptBaseConfig
 from modelopt.torch.opt.mode import (
     ConvertEntrypoint,
-    ConvertReturnType,
-    ModeConfigList,
     ModeDescriptor,
     RestoreEntrypoint,
     UpdateEntrypoint,
     _ModeRegistryCls,
 )
-from .config import PEFTConfig, ExportPEFTConfig
-from .conversion import convert_to_peft_model, restore_peft_model, update_peft_metadata, export_peft_model, restore_export_peft_model
+
+from .config import ExportPEFTConfig, PEFTConfig
+from .conversion import (
+    convert_to_peft_model,
+    export_peft_model,
+    restore_export_peft_model,
+    restore_peft_model,
+    update_peft_metadata,
+)
 
 PEFTModeRegistry = _ModeRegistryCls("PEFT")
 
+
 @PEFTModeRegistry.register_mode
 class PEFTModeDescriptor(ModeDescriptor):
+    """Mode descriptor for PEFT (Parameter-Efficient Fine-Tuning) mode."""
+
     @property
     def name(self) -> str:
+        """Returns the value (str representation) of the mode."""
         return "peft"
 
     @property
     def config_class(self) -> type[ModeloptBaseConfig]:
+        """Specifies the config class for the mode."""
         return PEFTConfig
 
     @property
     def export_mode(self) -> str | None:
+        """Specifies the export mode name for this mode."""
         return "export_peft"
 
     @property
     def convert(self) -> ConvertEntrypoint:
+        """The mode's entrypoint for converting a model."""
         return convert_to_peft_model
 
     @property
     def restore(self) -> RestoreEntrypoint:
+        """The mode's entrypoint for restoring a model."""
         return restore_peft_model
 
     @property
     def update_for_save(self) -> UpdateEntrypoint:
+        """The mode's entrypoint for updating the model's state before saving."""
         return update_peft_metadata
 
     @property
     def update_for_new_mode(self) -> UpdateEntrypoint:
         """The mode's entrypoint for updating the models state before new mode."""
         return update_peft_metadata
 
+
 @PEFTModeRegistry.register_mode
 class ExportPEFTModeDescriptor(ModeDescriptor):
+    """Mode descriptor for exporting PEFT models."""
 
     @property
     def name(self) -> str:
@@ -70,4 +88,4 @@ def convert(self) -> ConvertEntrypoint:
     @property
     def restore(self) -> RestoreEntrypoint:
         """The mode's entrypoint for restoring a model."""
-        return restore_export_peft_model
+        return restore_export_peft_model