Add more functions

jingyu-ml · jingyu-ml · commit 63e64c529307 · 2025-09-18T04:33:42.000Z
Signed-off-by: Jingyu Xin &lt;jingyux@nvidia.com&gt;
diff --git a/modelopt/torch/peft/config.py b/modelopt/torch/peft/config.py
@@ -13,16 +13,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from collections.abc import Callable
 from typing import Literal
 
 from pydantic import ValidationInfo, field_validator, model_validator
 
 from modelopt.torch.opt.config import ModeloptBaseConfig, ModeloptField
 from modelopt.torch.utils.network import ConstructorLike
+
 BiasType = Literal["static", "dynamic"]
 BiasMethod = Literal["mean", "max_min"]
 
+
 class QuantizerAttributeConfig(ModeloptBaseConfig):
     """Quantizer attribute type."""
 
@@ -358,9 +359,10 @@ class SVDQuantConfig(QuantizeAlgorithmConfig):
 
 # QuantizeAlgoCfgType = _QuantizeAlgoCfgType | list[_QuantizeAlgoCfgType] | None
 
-#TODO Jingyu Xin
+
+# TODO Jingyu Xin
 class PEFTConfig(ModeloptBaseConfig):
-    """Default configuration for ``quantize`` mode."""
+    """Default configuration for ``peft`` mode."""
 
     adapter_name: str = ModeloptField(
         default="default",
@@ -380,8 +382,11 @@ class PEFTConfig(ModeloptBaseConfig):
         validate_default=True,
     )
 
+
 class ExportPEFTConfig(ModeloptBaseConfig):
     """An empty config."""
+
+
 class CompressConfig(ModeloptBaseConfig):
     """Default configuration for ``compress`` mode."""
 
diff --git a/modelopt/torch/peft/conversion.py b/modelopt/torch/peft/conversion.py
@@ -42,6 +42,7 @@ def convert_to_peft_model(model: ModelLikeModule, config: PEFTConfig) -> Convert
     # set_quantizer_by_cfg(model, config.get("quant_cfg", {}))
 
     metadata = {}
+    # Should return adapaters, active_adapters
     update_peft_metadata(model, config, metadata)
 
     return model, metadata
diff --git a/modelopt/torch/peft/convert.py b/modelopt/torch/peft/convert.py
@@ -20,47 +20,32 @@
 
 import torch.nn as nn
 
-# import modelopt.torch.quantization as mtq
 from modelopt.torch.opt import apply_mode
-
-# from modelopt.torch.quantization.conversion import set_quantizer_by_cfg
 from modelopt.torch.opt.conversion import ModeloptStateManager
-
-# from modelopt.torch.opt.searcher import ForwardLoop
-# from modelopt.torch.opt.utils import forward_with_reshard
 from modelopt.torch.peft.config import PEFTConfig
 
 from .lora.layer import LoRAModule
-
-# from . import config
-# from .algorithms import AutoQuantizeSearcher
-# from .config import QuantizeAlgoCfgType
-# from .conversion import set_quantizer_attribute
 from .mode import PEFTModeRegistry
 
-# from .nn import QuantModule, TensorQuantizer
-
-# __all__ = [
-#     "auto_quantize",
-#     "calibrate",
-#     "disable_quantizer",
-#     "enable_quantizer",
-#     "fold_weight",
-#     "postprocess_amax",
-#     "print_quant_summary",
-#     "quantize",
-# ]
-
 
 def update_model(
     model: nn.Module,
     config: dict[str, Any | PEFTConfig],
 ):
-    # TODO: deal with extra state, how to save the model
-    # TODO: sharded dict
-    # TODO: metadate
-    # TODO: how to restore the model
-    apply_mode(model, mode=[("peft", config)], registry=PEFTModeRegistry)
+    """Update model with PEFT/LoRA adapters.
+    This function handles both initial PEFT conversion and adding additional adapters:
+    - First call: Converts modules to LoRAModules and adds the first adapter
+    - Subsequent calls: Adds new adapters to existing LoRAModules
+    Args:
+        model: The model to update
+        config: PEFT configuration containing adapter settings
+    Returns:
+        The updated model with LoRA adapters
+    """
+    # Check if model is already in PEFT mode by looking for LoRA modules
+    if not is_peft_model(model):
+        # First time - need to convert to PEFT mode
+        apply_mode(model, mode=[("peft", config)], registry=PEFTModeRegistry)
     return add_adapter(model, config)
 
 
@@ -79,7 +64,9 @@ def add_adapter(model, config):
                         continue
                 else:
                     raise NotImplementedError(f"Unsupported type {type(wildcard_or_filter_func)}")
-                module.update_layer_lora(adapter_name, adapter_setting["rank"])
+                module.update_layer_lora(
+                    adapter_name, adapter_setting["rank"], adapter_setting.get("scale", 1.0)
+                )
 
     # Update the metadata in ModeloptStateManager after adding adapters
     _update_peft_metadata_in_state(model)
@@ -111,3 +98,21 @@ def _update_peft_metadata_in_state(model: nn.Module) -> None:
     # Update the metadata in the last mode state (which should be 'peft')
     if manager._state and manager._last_metadata is not None:
         manager._last_metadata["peft_state"] = current_peft_state
+
+
+def is_peft_model(model: nn.Module) -> bool:
+    """Check if the model has been converted to PEFT/LoRA model.
+
+    This function checks if any modules in the model are LoRAModule instances,
+    which indicates the model has already been converted to PEFT mode.
+
+    Args:
+        model: The model to check
+
+    Returns:
+        True if the model contains LoRA modules, False otherwise
+    """
+    for _, module in model.named_modules():
+        if isinstance(module, LoRAModule):
+            return True
+    return False
diff --git a/modelopt/torch/peft/lora/layer.py b/modelopt/torch/peft/lora/layer.py
@@ -73,7 +73,7 @@ def deactivate_all_adapters(self) -> None:
         self._active_adapters.clear()
 
     def _register_adapter(
-        self, adapter_name: str, lora_a: nn.Module, lora_b: nn.Module, rank: int
+        self, adapter_name: str, lora_a: nn.Module, lora_b: nn.Module, rank: int, scale: float = 1.0
     ) -> None:
         """Register a new LoRA adapter with explicit rank tracking.
 
@@ -82,6 +82,7 @@ def _register_adapter(
             lora_a: LoRA A module (down-projection)
             lora_b: LoRA B module (up-projection)
             rank: Rank of the LoRA decomposition
+            scale: Scale factor for the LoRA output
         """
         # Add as submodules for proper parameter registration
         self.add_module(f"lora_a_{adapter_name}", lora_a)
@@ -92,13 +93,14 @@ def _register_adapter(
             "lora_a": lora_a,
             "lora_b": lora_b,
             "rank": rank,  # Store rank explicitly for reliability
+            "scale": scale,
         }
 
         # Automatically activate new adapters
         self.activate_adapter(adapter_name)
 
     @abstractmethod
-    def update_layer_lora(self, adapter_name: str, rank: int = 64) -> None:
+    def update_layer_lora(self, adapter_name: str, rank: int = 64, scale: float = 1.0) -> None:
         """Create and register a new LoRA adapter.
 
         This method must be implemented by subclasses to create the appropriate
@@ -107,6 +109,7 @@ def update_layer_lora(self, adapter_name: str, rank: int = 64) -> None:
         Args:
             adapter_name: Name for the new adapter
             rank: Rank of the LoRA decomposition (default: 64)
+            scale: Scale factor for the LoRA output (default: 1.0)
         """
         raise NotImplementedError("Subclasses must implement update_layer_lora")
 
@@ -148,14 +151,12 @@ def get_peft_state(self) -> dict[str, Any]:
                 "is_active": adapter_name in self._active_adapters,
                 "lora_a_type": type(lora_a).__name__,
                 "lora_b_type": type(lora_b).__name__,
+                "scale": adapter_modules.get("scale", 1.0),
             }
 
         modelopt_state["adapters"] = adapters_config
         modelopt_state["active_adapters"] = list(self._active_adapters)
 
-        # Store the base module type for validation
-        modelopt_state["base_module_type"] = type(self).__name__
-
         return modelopt_state
 
     def get_extra_state(self) -> dict[str, Any]:
@@ -177,6 +178,36 @@ def get_extra_state(self) -> dict[str, Any]:
 
         return {"modelopt_peft_state": peft_state}
 
+    def set_from_peft_state(self, peft_state: dict[str, Any]) -> None:
+        """Restore LoRA adapters from saved PEFT state.
+
+        This method recreates LoRA adapters based on their saved configuration.
+        Note: This only restores the adapter structure, not the weights.
+
+        Args:
+            peft_state: Dictionary containing adapter configurations
+        """
+        adapters_config = peft_state.get("adapters", {})
+
+        # Clear existing adapters first
+        self._lora_adapters.clear()
+        self._active_adapters.clear()
+
+        # Recreate each adapter based on saved configuration
+        for adapter_name, config in adapters_config.items():
+            rank = config.get("rank")
+            scale = config.get("scale", 1.0)
+
+            if rank is not None:
+                # Create the adapter with saved configuration
+                self.update_layer_lora(adapter_name, rank=rank, scale=scale)
+
+                # Set activation state
+                if config.get("is_active", False):
+                    self.activate_adapter(adapter_name)
+                else:
+                    self.deactivate_adapter(adapter_name)
+
     def set_extra_state(self, state: dict[str, Any]) -> None:
         """Restore extra state for distributed checkpointing.
 
@@ -245,7 +276,8 @@ def forward(self, x: torch.Tensor, *args, **kwargs) -> Any:
                     if isinstance(lora_b_output, tuple):
                         lora_b_output = lora_b_output[0]
 
-                    result = result + lora_b_output
+                    scale = adapter.get("scale", 1.0)
+                    result = result + scale * lora_b_output
 
         # Return output in the same format as the base layer
         if other_outputs:
diff --git a/modelopt/torch/peft/lora/tp_layer.py b/modelopt/torch/peft/lora/tp_layer.py
@@ -9,8 +9,20 @@
 
 from .layer import LoRAModule, LoRAModuleRegistry
 
-# Default rank for LoRA decomposition
+try:
+    from modelopt.torch.quantization.plugins.megatron import (
+        _MegatronColumnParallelLinear as QuantColumnParallelLinear,
+    )
+    from modelopt.torch.quantization.plugins.megatron import (
+        _MegatronRowParallelLinear as QuantRowParallelLinear,
+    )
+
+    QUANT_MODULES_AVAILABLE = True
+except ImportError:
+    QUANT_MODULES_AVAILABLE = False
+
 DEFAULT_LORA_RANK = 64
+DEFAULT_SCALE = 1.0
 
 
 class _MegatronParallelLoRABase(LoRAModule):
@@ -33,7 +45,7 @@ def _get_init_methods(self) -> tuple[Callable, Callable]:
         return lora_a_init, lora_b_init
 
     def _register_adapter_with_device(
-        self, adapter_name: str, lora_a: nn.Module, lora_b: nn.Module, rank: int
+        self, adapter_name: str, lora_a: nn.Module, lora_b: nn.Module, rank: int, scale: float
     ) -> None:
         """Register LoRA adapter modules and ensure correct device placement.
 
@@ -43,23 +55,29 @@ def _register_adapter_with_device(
             lora_b: LoRA B module (up-projection)
             rank: Rank of the LoRA decomposition
         """
-        # Move LoRA modules to the same device as the parent module
-        # Try to get device from parent module's parameters or buffers
+        # Move LoRA modules to the same device and dtype as the parent module
+        # Try to get device and dtype from parent module's parameters or buffers
         device = None
+        dtype = None
         for p in self.parameters():
             device = p.device
+            dtype = p.dtype
             break
         if device is None:
             for b in self.buffers():
                 device = b.device
+                dtype = b.dtype
                 break
 
-        # If we found a device, move LoRA modules to it
+        # If we found a device and dtype, move LoRA modules to match
         if device is not None:
             lora_a = lora_a.to(device)
             lora_b = lora_b.to(device)
+        if dtype is not None:
+            lora_a = lora_a.to(dtype)
+            lora_b = lora_b.to(dtype)
 
-        super()._register_adapter(adapter_name, lora_a, lora_b, rank)
+        super()._register_adapter(adapter_name, lora_a, lora_b, rank, scale)
 
 
 @LoRAModuleRegistry.register({ColumnParallelLinear: "megatron_ColumnParallelLinear"})
@@ -70,7 +88,9 @@ class _MegatronColumnParallelLinear(_MegatronParallelLoRABase):
     the parallelization scheme of the base layer.
     """
 
-    def update_layer_lora(self, adapter_name: str, rank: int = DEFAULT_LORA_RANK) -> None:
+    def update_layer_lora(
+        self, adapter_name: str, rank: int = DEFAULT_LORA_RANK, scale: float = DEFAULT_SCALE
+    ) -> None:
         """Create and register a new LoRA adapter for ColumnParallelLinear.
 
         Args:
@@ -100,7 +120,7 @@ def update_layer_lora(self, adapter_name: str, rank: int = DEFAULT_LORA_RANK) ->
             init_method=lora_b_init,
         )
 
-        self._register_adapter_with_device(adapter_name, lora_a, lora_b, rank)
+        self._register_adapter_with_device(adapter_name, lora_a, lora_b, rank, scale)
 
 
 @LoRAModuleRegistry.register({RowParallelLinear: "megatron_RowParallelLinear"})
@@ -111,7 +131,9 @@ class _MegatronRowParallelLinear(_MegatronParallelLoRABase):
     the parallelization scheme of the base layer.
     """
 
-    def update_layer_lora(self, adapter_name: str, rank: int = DEFAULT_LORA_RANK) -> None:
+    def update_layer_lora(
+        self, adapter_name: str, rank: int = DEFAULT_LORA_RANK, scale: float = DEFAULT_SCALE
+    ) -> None:
         """Create and register a new LoRA adapter for RowParallelLinear.
 
         Args:
@@ -141,4 +163,15 @@ def update_layer_lora(self, adapter_name: str, rank: int = DEFAULT_LORA_RANK) ->
             init_method=lora_b_init,
         )
 
-        self._register_adapter_with_device(adapter_name, lora_a, lora_b, rank)
+        self._register_adapter_with_device(adapter_name, lora_a, lora_b, rank, scale)
+
+
+# Register quantized versions if available
+if QUANT_MODULES_AVAILABLE:
+    # Register the same LoRA implementations for quantized modules
+    LoRAModuleRegistry.register({QuantColumnParallelLinear: "quant_megatron_ColumnParallelLinear"})(
+        _MegatronColumnParallelLinear
+    )
+    LoRAModuleRegistry.register({QuantRowParallelLinear: "quant_megatron_RowParallelLinear"})(
+        _MegatronRowParallelLinear
+    )
diff --git a/modelopt/torch/peft/plugins/megatron.py b/modelopt/torch/peft/plugins/megatron.py
diff --git a/tests/gpu/torch/peft/test_forward_megatron.py b/tests/gpu/torch/peft/test_forward_megatron.py