Update restore logic

jingyu-ml · jingyu-ml · commit 1d38784e8c69 · 2025-09-19T21:15:47.000Z
Signed-off-by: Jingyu Xin &lt;jingyux@nvidia.com&gt;
diff --git a/modelopt/torch/peft/config.py b/modelopt/torch/peft/config.py
@@ -16,8 +16,8 @@
 """Configuration classes for PEFT methods."""
 
 import math
+import pickle  # nosec B403 - Only checking picklability
 from collections.abc import Callable
-from collections.abc import Callable as CallableType
 
 import torch.nn.init as init
 from pydantic import field_validator, model_validator
@@ -27,6 +27,16 @@
 __all__ = ["ExportPEFTConfig", "PEFTAttributeConfig", "PEFTConfig"]
 
 
+def default_lora_a_init(weight):
+    """Default initialization for LoRA A matrix using Kaiming uniform."""
+    return init.kaiming_uniform_(weight, a=math.sqrt(5))
+
+
+def default_lora_b_init(weight):
+    """Default initialization for LoRA B matrix using zeros."""
+    return init.zeros_(weight)
+
+
 class PEFTAttributeConfig(ModeloptBaseConfig):
     """Configuration for PEFT adapter attributes."""
 
@@ -52,13 +62,13 @@ class PEFTAttributeConfig(ModeloptBaseConfig):
     )
 
     lora_a_init: Callable[[object], None] | None = ModeloptField(
-        default=lambda weight: init.kaiming_uniform_(weight, a=math.sqrt(5)),
+        default=default_lora_a_init,
         title="LoRA A matrix initializer",
         description="Custom initialization function for LoRA A matrix. Default to Kaiming uniform initialization.",
     )
 
     lora_b_init: Callable[[object], None] | None = ModeloptField(
-        default=lambda weight: init.zeros_(weight),
+        default=default_lora_b_init,
         title="LoRA B matrix initializer",
         description="Custom initialization function for LoRA B matrix. Default to zero initialization.",
     )
@@ -81,16 +91,34 @@ def validate_scale(cls, v):
 
     @model_validator(mode="after")
     def validate_init_functions(self):
-        """Validate initialization functions are callable."""
+        """Validate initialization functions are callable and picklable."""
         if self.lora_a_init is not None and not callable(self.lora_a_init):
             raise ValueError("lora_a_init must be callable")
         if self.lora_b_init is not None and not callable(self.lora_b_init):
             raise ValueError("lora_b_init must be callable")
+        if self.lora_a_init is not None:
+            try:
+                _del = pickle.dumps(self.lora_a_init)
+                del _del
+            except (pickle.PicklingError, TypeError, AttributeError) as e:
+                raise ValueError(
+                    f"lora_a_init cannot be pickled: {e}. "
+                    "Please use a module-level function instead of a lambda or nested function."
+                )
+        if self.lora_b_init is not None:
+            try:
+                _del = pickle.dumps(self.lora_b_init)
+                del _del
+            except (pickle.PicklingError, TypeError, AttributeError) as e:
+                raise ValueError(
+                    f"lora_b_init cannot be pickled: {e}. "
+                    "Please use a module-level function instead of a lambda or nested function."
+                )
         return self
 
 
 # Type alias for adapter configuration
-PEFTAdapterCfgType = dict[str | CallableType, PEFTAttributeConfig | dict]
+PEFTAdapterCfgType = dict[str | Callable, PEFTAttributeConfig | dict]
 
 
 class PEFTConfig(ModeloptBaseConfig):
diff --git a/modelopt/torch/peft/lora/layer.py b/modelopt/torch/peft/lora/layer.py
@@ -165,17 +165,9 @@ def set_from_peft_state(self, peft_state: dict[str, Any]) -> None:
         """
         adapters_config = peft_state.get("adapters", {})
 
-        self._lora_adapters.clear()
-        self._active_adapters.clear()
-
         for adapter_name, config in adapters_config.items():
-            self.update_layer_lora(adapter_name, config)
-
-            # Set activation state
-            if config.get("is_active", False):
-                self.activate_adapter(adapter_name)
-            else:
-                self.deactivate_adapter(adapter_name)
+            if adapter_name not in self._lora_adapters:
+                self.update_layer_lora(adapter_name, config)
 
     def set_extra_state(self, state: dict[str, Any]) -> None:
         """Restore extra state for distributed checkpointing.