Update more, config + conversation

jingyu-ml · jingyu-ml · commit ca9698d1f18a · 2025-09-19T18:36:59.000Z
Signed-off-by: Jingyu Xin &lt;jingyux@nvidia.com&gt;
diff --git a/modelopt/torch/peft/config.py b/modelopt/torch/peft/config.py
@@ -15,30 +15,132 @@
 
 """Configuration classes for PEFT methods."""
 
+import math
+from collections.abc import Callable
+from collections.abc import Callable as CallableType
+
+import torch.nn.init as init
+from pydantic import field_validator, model_validator
+
 from modelopt.torch.opt.config import ModeloptBaseConfig, ModeloptField
 
+__all__ = ["ExportPEFTConfig", "PEFTAttributeConfig", "PEFTConfig"]
+
+
+class PEFTAttributeConfig(ModeloptBaseConfig):
+    """Configuration for PEFT adapter attributes."""
+
+    enable: bool = ModeloptField(
+        default=True,
+        title="Enable adapter",
+        description="If True, enables the adapter. If False, by-passes the adapter.",
+    )
+
+    rank: int = ModeloptField(
+        default=64,
+        title="LoRA rank",
+        description=(
+            "The rank (dimension) of the LoRA matrices. "
+            "Higher rank allows more expressiveness but uses more memory."
+        ),
+    )
+
+    scale: float = ModeloptField(
+        default=1.0,
+        title="LoRA scaling factor",
+        description="Scaling factor for the LoRA output. Controls the magnitude of the adaptation.",
+    )
+
+    lora_a_init: Callable[[object], None] | None = ModeloptField(
+        default=lambda weight: init.kaiming_uniform_(weight, a=math.sqrt(5)),
+        title="LoRA A matrix initializer",
+        description="Custom initialization function for LoRA A matrix. Default to Kaiming uniform initialization.",
+    )
+
+    lora_b_init: Callable[[object], None] | None = ModeloptField(
+        default=lambda weight: init.zeros_(weight),
+        title="LoRA B matrix initializer",
+        description="Custom initialization function for LoRA B matrix. Default to zero initialization.",
+    )
+
+    @field_validator("rank")
+    @classmethod
+    def validate_rank(cls, v):
+        """Validate rank is positive."""
+        if v < 1:
+            raise ValueError("rank must be a positive integer")
+        return v
+
+    @field_validator("scale")
+    @classmethod
+    def validate_scale(cls, v):
+        """Validate scale is positive."""
+        if v <= 0:
+            raise ValueError("scale must be a positive number")
+        return v
+
+    @model_validator(mode="after")
+    def validate_init_functions(self):
+        """Validate initialization functions are callable."""
+        if self.lora_a_init is not None and not callable(self.lora_a_init):
+            raise ValueError("lora_a_init must be callable")
+        if self.lora_b_init is not None and not callable(self.lora_b_init):
+            raise ValueError("lora_b_init must be callable")
+        return self
+
+
+# Type alias for adapter configuration
+PEFTAdapterCfgType = dict[str | CallableType, PEFTAttributeConfig | dict]
+
 
 class PEFTConfig(ModeloptBaseConfig):
     """Default configuration for ``peft`` mode."""
 
     adapter_name: str = ModeloptField(
         default="default",
-        title="Placeholder",
+        title="Adapter name",
+        description="Name of the adapter to create or update.",
         validate_default=True,
     )
 
-    adapter_cfg: dict = ModeloptField(
+    adapter_cfg: PEFTAdapterCfgType = ModeloptField(
         default={"default": {"rank": 128}},
-        title="Placeholder",
+        title="Adapter configuration",
+        description="Configuration for adapters. Maps module patterns to PEFTAttributeConfig or dict.",
         validate_default=True,
     )
 
     adapter_type: str = ModeloptField(
         default="lora",
-        title="Placeholder",
+        title="Adapter type",
+        description="Type of PEFT adapter to use. Currently only 'lora' is supported.",
         validate_default=True,
     )
 
+    @field_validator("adapter_type")
+    @classmethod
+    def validate_adapter_type(cls, v):
+        """Validate adapter type."""
+        if v not in ["lora"]:
+            raise ValueError(f"Unsupported adapter type: {v}. Only 'lora' is currently supported.")
+        return v
+
+    @field_validator("adapter_cfg")
+    @classmethod
+    def validate_adapter_cfg(cls, v):
+        """Validate and convert adapter configurations."""
+        validated_cfg = {}
+        for key, value in v.items():
+            if isinstance(value, dict) and not isinstance(value, PEFTAttributeConfig):
+                # Convert dict to PEFTAttributeConfig to trigger validation
+                try:
+                    validated_cfg[key] = PEFTAttributeConfig(**value)
+                except Exception as e:
+                    raise ValueError(f"Invalid adapter configuration for '{key}': {e}")
+            else:
+                validated_cfg[key] = value
+        return validated_cfg
+
 
 class ExportPEFTConfig(ModeloptBaseConfig):
     """An empty config."""
diff --git a/modelopt/torch/peft/conversion.py b/modelopt/torch/peft/conversion.py
@@ -15,6 +15,7 @@
 
 """PEFT conversion and restore utilities for LoRA modules."""
 
+import fnmatch
 from typing import Any
 
 import torch.nn as nn
@@ -41,6 +42,7 @@ def convert_to_peft_model(model: ModelLikeModule, config: PEFTConfig) -> Convert
     replace_lora_module(model, version=ModeloptStateManager(model).state_version, config=config)
 
     metadata = {}
+    add_adapter(model, config)
     # Should return adapaters, active_adapters
     update_peft_metadata(model, config, metadata)
 
@@ -157,3 +159,60 @@ def update_peft_metadata_in_model(model: nn.Module) -> None:
     # Update the metadata with current PEFT state
     if manager._state and manager._last_metadata is not None:
         manager._last_metadata["peft_state"] = peft_state(model)
+
+
+def add_adapter(model, config: PEFTConfig):
+    """Add a new LoRA adapter to the model.
+
+    Args:
+        model: Model with LoRA modules to add adapters to
+        config: PEFTConfig instance containing adapter_cfg and adapter_name
+
+    Returns:
+        The model with the new adapter added
+    """
+    adapter_cfg = config.adapter_cfg
+    adapter_name = config.adapter_name
+
+    for name, module in model.named_modules():
+        if isinstance(module, LoRAModule):
+            for wildcard_or_filter_func, adapter_setting in adapter_cfg.items():
+                if isinstance(wildcard_or_filter_func, str):
+                    if not fnmatch.fnmatch(name, wildcard_or_filter_func):
+                        continue
+                elif callable(wildcard_or_filter_func):
+                    if not wildcard_or_filter_func(name):
+                        continue
+                else:
+                    raise NotImplementedError(f"Unsupported type {type(wildcard_or_filter_func)}")
+                if adapter_setting.enable:  # type: ignore[union-attr]
+                    module.update_layer_lora(
+                        adapter_name,
+                        adapter_setting,
+                    )
+
+    _update_peft_metadata_in_state(model)
+    return model
+
+
+def _update_peft_metadata_in_state(model: nn.Module) -> None:
+    """Update the PEFT metadata in the ModeloptStateManager.
+
+    This function updates the metadata to reflect the current state of LoRA adapters
+    after they have been added or modified.
+    """
+    if not ModeloptStateManager.is_converted(model):
+        return
+
+    manager = ModeloptStateManager(model)
+
+    current_peft_state = {}
+    for name, module in model.named_modules():
+        if isinstance(module, LoRAModule):
+            from modelopt.torch.utils import get_unwrapped_name
+
+            unwrapped_name = get_unwrapped_name(name)
+            current_peft_state[unwrapped_name] = module.get_peft_state()
+
+    if manager._state and manager._last_metadata is not None:
+        manager._last_metadata["peft_state"] = current_peft_state
diff --git a/modelopt/torch/peft/convert.py b/modelopt/torch/peft/convert.py
@@ -15,22 +15,21 @@
 
 """User-facing PEFT API for LoRA module conversion and adapter management."""
 
-import fnmatch
 from typing import Any
 
 import torch.nn as nn
 
 from modelopt.torch.opt import apply_mode
-from modelopt.torch.opt.conversion import ModeloptStateManager
 from modelopt.torch.peft.config import PEFTConfig
+from modelopt.torch.peft.conversion import add_adapter
 
 from .lora.layer import LoRAModule
 from .mode import PEFTModeRegistry
 
 
 def update_model(
     model: nn.Module,
-    config: dict[str, Any | PEFTConfig],
+    config: dict[str, Any] | PEFTConfig,
 ):
     """Update model with PEFT/LoRA adapters.
 
@@ -40,78 +39,24 @@ def update_model(
 
     Args:
         model: The model to update
-        config: PEFT configuration containing adapter settings
+        config: PEFT configuration dict or PEFTConfig instance
 
     Returns:
         The updated model with LoRA adapters
     """
+    # Validate config by converting to PEFTConfig if needed
+
     # Check if model is already in PEFT mode by looking for LoRA modules
     if not is_peft_model(model):
         # First time - need to convert to PEFT mode
         apply_mode(model, mode=[("peft", config)], registry=PEFTModeRegistry)
-    return add_adapter(model, config)
-
-
-def add_adapter(model, config):
-    """Add a new LoRA adapter to the model.
-
-    Args:
-        model: Model with LoRA modules to add adapters to
-        config: Configuration dict containing adapter_cfg and adapter_name
-
-    Returns:
-        The model with the new adapter added
-    """
-    adapter_cfg = config["adapter_cfg"]
-    adapter_name = config["adapter_name"]
-
-    for name, module in model.named_modules():
-        if isinstance(module, LoRAModule):
-            for wildcard_or_filter_func, adapter_setting in adapter_cfg.items():
-                if isinstance(wildcard_or_filter_func, str):
-                    if not fnmatch.fnmatch(name, wildcard_or_filter_func):
-                        continue
-                elif callable(wildcard_or_filter_func):
-                    if not wildcard_or_filter_func(name):
-                        continue
-                else:
-                    raise NotImplementedError(f"Unsupported type {type(wildcard_or_filter_func)}")
-                module.update_layer_lora(
-                    adapter_name, adapter_setting["rank"], adapter_setting.get("scale", 1.0)
-                )
-
-    # Update the metadata in ModeloptStateManager after adding adapters
-    _update_peft_metadata_in_state(model)
+    else:
+        if not isinstance(config, PEFTConfig):
+            config = PEFTConfig(**config)
+        add_adapter(model, config)
     return model
 
 
-def _update_peft_metadata_in_state(model: nn.Module) -> None:
-    """Update the PEFT metadata in the ModeloptStateManager.
-
-    This function updates the metadata to reflect the current state of LoRA adapters
-    after they have been added or modified.
-    """
-    # Check if model has ModeloptStateManager (has been converted with peft mode)
-    if not ModeloptStateManager.is_converted(model):
-        return
-
-    # Get the state manager
-    manager = ModeloptStateManager(model)
-
-    # Get current PEFT state from all LoRA modules
-    current_peft_state = {}
-    for name, module in model.named_modules():
-        if isinstance(module, LoRAModule):
-            from modelopt.torch.utils import get_unwrapped_name
-
-            unwrapped_name = get_unwrapped_name(name)
-            current_peft_state[unwrapped_name] = module.get_peft_state()
-
-    # Update the metadata in the last mode state (which should be 'peft')
-    if manager._state and manager._last_metadata is not None:
-        manager._last_metadata["peft_state"] = current_peft_state
-
-
 def is_peft_model(model: nn.Module) -> bool:
     """Check if the model has been converted to PEFT/LoRA model.
 
diff --git a/modelopt/torch/peft/lora/layer.py b/modelopt/torch/peft/lora/layer.py
@@ -9,6 +9,8 @@
 
 from modelopt.torch.opt.dynamic import DynamicModule, _DMRegistryCls
 
+from ..config import PEFTAttributeConfig
+
 __all__ = [
     "LoRAModule",
     "LoRAModuleRegistry",
@@ -100,7 +102,11 @@ def _register_adapter(
         self.activate_adapter(adapter_name)
 
     @abstractmethod
-    def update_layer_lora(self, adapter_name: str, rank: int = 64, scale: float = 1.0) -> None:
+    def update_layer_lora(
+        self,
+        adapter_name: str,
+        attr_config: PEFTAttributeConfig,
+    ) -> None:
         """Create and register a new LoRA adapter.
 
         This method must be implemented by subclasses to create the appropriate
@@ -110,6 +116,8 @@ def update_layer_lora(self, adapter_name: str, rank: int = 64, scale: float = 1.
             adapter_name: Name for the new adapter
             rank: Rank of the LoRA decomposition (default: 64)
             scale: Scale factor for the LoRA output (default: 1.0)
+            lora_a_init: Optional initialization function for LoRA A matrix
+            lora_b_init: Optional initialization function for LoRA B matrix
         """
         raise NotImplementedError("Subclasses must implement update_layer_lora")
 
@@ -189,24 +197,17 @@ def set_from_peft_state(self, peft_state: dict[str, Any]) -> None:
         """
         adapters_config = peft_state.get("adapters", {})
 
-        # Clear existing adapters first
         self._lora_adapters.clear()
         self._active_adapters.clear()
 
-        # Recreate each adapter based on saved configuration
         for adapter_name, config in adapters_config.items():
-            rank = config.get("rank")
-            scale = config.get("scale", 1.0)
-
-            if rank is not None:
-                # Create the adapter with saved configuration
-                self.update_layer_lora(adapter_name, rank=rank, scale=scale)
+            self.update_layer_lora(adapter_name, config)
 
-                # Set activation state
-                if config.get("is_active", False):
-                    self.activate_adapter(adapter_name)
-                else:
-                    self.deactivate_adapter(adapter_name)
+            # Set activation state
+            if config.get("is_active", False):
+                self.activate_adapter(adapter_name)
+            else:
+                self.deactivate_adapter(adapter_name)
 
     def set_extra_state(self, state: dict[str, Any]) -> None:
         """Restore extra state for distributed checkpointing.
@@ -281,7 +282,7 @@ def forward(self, x: torch.Tensor, *args, **kwargs) -> Any:
 
         # Return output in the same format as the base layer
         if other_outputs:
-            return (result,) + other_outputs
+            return (result, *other_outputs)
         else:
             return result
 
diff --git a/modelopt/torch/peft/lora/tp_layer.py b/modelopt/torch/peft/lora/tp_layer.py