NVIDIA
diff --git a/‎modelopt/torch/peft/__init__.py‎
Lines changed: 23 additions & 0 deletions b/‎modelopt/torch/peft/__init__.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎modelopt/torch/peft/config.py‎
Lines changed: 434 additions & 0 deletions b/‎modelopt/torch/peft/config.py‎
Lines changed: 434 additions & 0 deletions
diff --git a/‎modelopt/torch/peft/conversion.py‎
Lines changed: 108 additions & 0 deletions b/‎modelopt/torch/peft/conversion.py‎
Lines changed: 108 additions & 0 deletions
diff --git a/‎modelopt/torch/peft/convert.py‎
Lines changed: 80 additions & 0 deletions b/‎modelopt/torch/peft/convert.py‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎modelopt/torch/peft/lora/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎modelopt/torch/peft/lora/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎modelopt/torch/peft/lora/layer.py‎
Lines changed: 135 additions & 0 deletions b/‎modelopt/torch/peft/lora/layer.py‎
Lines changed: 135 additions & 0 deletions
@@ -0,0 +1,23 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Distillation API subpackage for torch."""
+
+from . import mode
+from .config import *
+from .convert import *
+# isort: off
+# Import plugins last to avoid circular imports
+# from . import plugins
@@ -0,0 +1,108 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Quantization conversion/restore utilities."""
+
+import fnmatch
+from collections.abc import Callable
+from contextlib import contextmanager
+from typing import Any
+
+import torch.nn as nn
+
+from modelopt.torch.opt.conversion import ApplyModeError, ModelLikeModule, ModeloptStateManager
+from modelopt.torch.opt.dynamic import _DMRegistryCls
+from modelopt.torch.opt.mode import ConvertReturnType, MetadataDict
+from modelopt.torch.utils import get_unwrapped_name
+
+from .config import (
+    PEFTConfig,
+    _QuantizeExportConfig,
+)
+from .lora.layer import LoRAModuleRegistry
+
+__all__ = [
+    "replace_lora_module",
+]
+
+
+def convert_to_peft_model(model: ModelLikeModule, config: PEFTConfig) -> ConvertReturnType:
+    """Convert the model to a quantized one as per `config`."""
+    # initialize the true module if necessary
+    model = model.init_modellike() if isinstance(model, ModelLikeModule) else model
+
+    # TODO: Replace to LoRA module
+    replace_lora_module(model, version=ModeloptStateManager(model).state_version, config=config)
+    # set_quantizer_by_cfg(model, config.get("quant_cfg", {}))
+
+    metadata = {}
+    # update_quantize_metadata(model, config, metadata)
+
+    return model, metadata
+
+def restore_peft_model(
+    model: ModelLikeModule, config: PEFTConfig, metadata: MetadataDict
+) -> nn.Module:
+    #TODO: implemente the restore logic
+    pass
+
+
+
+def update_peft_metadata(
+    model: nn.Module, config: PEFTConfig, metadata: MetadataDict
+) -> None:
+    """Update the quantizer state in the metadata dict."""
+    pass
+
+
+def replace_lora_module(model: nn.Module, version=None, config: PEFTConfig = None, registry=LoRAModuleRegistry):
+    """Recursively replace the module with quantized module."""
+    #TODO: register the extra state for megatron-lm
+
+    if type(model) in registry:
+        model = registry.convert(model)
+    _replace_lora_module(model, version=version, registry=registry)
+
+def export_peft_model(model: nn.Module, config):
+    """Export the quantized model to a quantized model."""
+    raise NotImplementedError("Exporting a quantized model is not supported yet.")
+
+
+def restore_export_peft_model(
+    model: nn.Module, config, metadata: MetadataDict
+):
+    """Restores the quantized model from the given state dict."""
+    raise NotImplementedError("Restoring a quantized & exported model is not supported yet.")
+
+
+def _replace_lora_module(model: nn.Module, version=None,registry=LoRAModuleRegistry):
+    for name, child in model.named_children():
+        if type(child) in registry:
+            lora_module = registry.convert(child)
+            setattr(model, name, lora_module)
+
+        _replace_lora_module(getattr(model, name), version=version, registry=registry)
+
+
+def export_quantized_model(model: nn.Module, config: _QuantizeExportConfig) -> ConvertReturnType:
+    """Export the quantized model to a quantized model."""
+    raise NotImplementedError("Exporting a quantized model is not supported yet.")
+
+
+def restore_export_quantized_model(
+    model: nn.Module, config: _QuantizeExportConfig, metadata: MetadataDict
+) -> nn.Module:
+    """Restores the quantized model from the given state dict."""
+    raise NotImplementedError("Restoring a quantized & exported model is not supported yet.")
@@ -0,0 +1,80 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""User-facing quantization API."""
+
+import fnmatch
+import inspect
+import warnings
+from collections.abc import Callable, Iterable
+from typing import Any
+
+import torch
+import torch.nn as nn
+
+# import modelopt.torch.quantization as mtq
+from modelopt.torch.opt import apply_mode
+# from modelopt.torch.opt.searcher import ForwardLoop
+# from modelopt.torch.opt.utils import forward_with_reshard
+from modelopt.torch.peft.config import PEFTConfig
+# from modelopt.torch.quantization.conversion import set_quantizer_by_cfg
+
+# from . import config
+# from .algorithms import AutoQuantizeSearcher
+# from .config import QuantizeAlgoCfgType
+# from .conversion import set_quantizer_attribute
+from .mode import PEFTModeRegistry
+from .lora.layer import LoRAModule
+# from .nn import QuantModule, TensorQuantizer
+
+# __all__ = [
+#     "auto_quantize",
+#     "calibrate",
+#     "disable_quantizer",
+#     "enable_quantizer",
+#     "fold_weight",
+#     "postprocess_amax",
+#     "print_quant_summary",
+#     "quantize",
+# ]
+
+def update_model(
+    model: nn.Module,
+    config: dict[str, Any | PEFTConfig],
+):
+    #TODO: deal with extra state, how to save the model
+    #TODO: sharded dict
+    #TODO: metadate
+    #TODO: how to restore the model
+    apply_mode(model, mode=[("peft", config)], registry=PEFTModeRegistry)
+    return add_adapter(model, config)
+
+def add_adapter(model, config):
+    adapter_cfg = config["adapter_cfg"]
+    adapter_name = config["adapter_name"]
+
+    for name, module in model.named_modules():
+        if isinstance(module, LoRAModule):
+            for wildcard_or_filter_func, adapter_setting in adapter_cfg.items():
+                if isinstance(wildcard_or_filter_func, str):
+                    if not fnmatch.fnmatch(name, wildcard_or_filter_func):
+                        continue
+                elif callable(wildcard_or_filter_func):
+                    if not wildcard_or_filter_func(name):
+                        continue
+                else:
+                    raise NotImplementedError(f"Unsupported type {type(wildcard_or_filter_func)}")
+                module.update_layer_lora(adapter_name, adapter_setting["rank"])
+    return model
@@ -0,0 +1,3 @@
+from . import layer
+from . import tp_layer
+# from . import linear_layer
@@ -0,0 +1,135 @@
+"""LoRA (Low-Rank Adaptation) module implementation."""
+
+from abc import abstractmethod
+from typing import Dict, Tuple, Any, Optional
+import torch
+import torch.nn as nn
+
+from modelopt.torch.opt.dynamic import DynamicModule, _DMRegistryCls
+
+__all__ = [
+    "LoRAModule",
+    "LoRAModuleRegistry",
+]
+
+
+class LoRAModule(DynamicModule):
+    """Base class for LoRA (Low-Rank Adaptation) modules.
+    
+    This module wraps existing layers and adds trainable low-rank decomposition
+    matrices (LoRA adapters) that are added to the original layer's output.
+    
+    Attributes:
+        _lora_adapters: Dictionary mapping adapter names to their LoRA A and B matrices
+        _active_adapters: Set of currently active adapter names
+    """
+    
+    def _setup(self) -> None:
+        """Initialize LoRA-specific attributes."""
+        self._lora_adapters: Dict[str, Dict[str, nn.Module]] = {}
+        self._active_adapters: set = set()
+    
+    @property
+    def adapter_names(self) -> set:
+        """Return the set of all registered adapter names."""
+        return set(self._lora_adapters.keys())
+    
+    @property
+    def active_adapters(self) -> set:
+        """Return the set of currently active adapter names."""
+        return self._active_adapters.copy()
+    
+    def activate_adapter(self, adapter_name: str) -> None:
+        """Activate a specific adapter.
+        
+        Args:
+            adapter_name: Name of the adapter to activate
+            
+        Raises:
+            ValueError: If adapter_name is not registered
+        """
+        if adapter_name not in self._lora_adapters:
+            raise ValueError(f"Adapter '{adapter_name}' not found. Available: {list(self._lora_adapters.keys())}")
+        self._active_adapters.add(adapter_name)
+    
+    def deactivate_adapter(self, adapter_name: str) -> None:
+        """Deactivate a specific adapter.
+        
+        Args:
+            adapter_name: Name of the adapter to deactivate
+        """
+        self._active_adapters.discard(adapter_name)
+    
+    def activate_all_adapters(self) -> None:
+        """Activate all registered adapters."""
+        self._active_adapters = self.adapter_names.copy()
+    
+    def deactivate_all_adapters(self) -> None:
+        """Deactivate all adapters."""
+        self._active_adapters.clear()
+    
+    @abstractmethod
+    def update_layer_lora(self, adapter_name: str, rank: int = 64) -> None:
+        """Create and register a new LoRA adapter.
+        
+        This method must be implemented by subclasses to create the appropriate
+        LoRA A and B matrices for the specific layer type.
+        
+        Args:
+            adapter_name: Name for the new adapter
+            rank: Rank of the LoRA decomposition (default: 64)
+        """
+        raise NotImplementedError("Subclasses must implement update_layer_lora")
+    
+    def forward(self, x: torch.Tensor, *args, **kwargs) -> Any:
+        """Forward pass with LoRA adaptation.
+        
+        Args:
+            x: Input tensor
+            *args: Additional positional arguments for the base layer
+            **kwargs: Additional keyword arguments for the base layer
+            
+        Returns:
+            Output from the base layer plus active LoRA adaptations
+        """
+        # Call the base layer's forward method
+        output = super().forward(x, *args, **kwargs)
+        
+        # Handle different output types from base layer
+        if isinstance(output, tuple):
+            # If output is a tuple, assume first element is the main result
+            result = output[0]
+            other_outputs = output[1:]
+        else:
+            # If output is a single tensor
+            result = output
+            other_outputs = ()
+        
+        # Apply active LoRA adapters
+        if self._active_adapters and self._lora_adapters:
+            for adapter_name in self._active_adapters:
+                if adapter_name in self._lora_adapters:
+                    adapter = self._lora_adapters[adapter_name]
+                    # LoRA computation: result = result + B(A(x))
+                    lora_a = adapter['lora_a']
+                    lora_b = adapter['lora_b']
+                    
+                    # Handle different forward signatures
+                    lora_a_output = lora_a(x)
+                    if isinstance(lora_a_output, tuple):
+                        lora_a_output = lora_a_output[0]
+                    
+                    lora_b_output = lora_b(lora_a_output)
+                    if isinstance(lora_b_output, tuple):
+                        lora_b_output = lora_b_output[0]
+                    
+                    result = result + lora_b_output
+        
+        # Return output in the same format as the base layer
+        if other_outputs:
+            return (result,) + other_outputs
+        else:
+            return result
+
+
+LoRAModuleRegistry = _DMRegistryCls("LoRA", LoRAModule)
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from . import layer`
	`2`	`+from . import tp_layer`
	`3`	`+# from . import linear_layer`