[PEFT]：nola (#10747)

weiweisss · web-flow · commit e050fd8f937c · 2025-09-05T17:24:02.000+08:00
* 'nola提交'

* Update test_nola.py

* solve conflicts
diff --git a/llm/run_finetune.py b/llm/run_finetune.py
@@ -580,7 +580,12 @@ def create_peft_model(model_args, reft_args, training_args, dtype, model_config,
                 use_quick_lora=model_args.use_quick_lora,
                 lora_use_mixer=model_args.lora_use_mixer,
                 use_mora=model_args.use_mora,
+<<<<<<< HEAD
+                nola=model_args.nola,
+                nola_basis_num=model_args.nola_basis_num,
+=======
                 mixer_num=model_args.mixer_num,
+>>>>>>> upstream/develop
                 lorapro=model_args.lorapro,
             )
             if model_args.lorapro:
diff --git a/paddlenlp/peft/lora/lora_config.py b/paddlenlp/peft/lora/lora_config.py
@@ -77,6 +77,8 @@ class LoRAConfig:
     rslora: bool = field(default=False, metadata={"help": "Whether to use RsLoRA"})
     pissa: bool = field(default=False, metadata={"help": "Whether to use Pissa: https://arxiv.org/pdf/2404.02948.pdf"})
     loraga: bool = field(default=False, metadata={"help": "Whether to LoRA-GA"})
+    nola: bool = field(default=False, metadata={"help": "Whether to use Nola: https://arxiv.org/pdf/2310.02556"})
+    nola_basis_num: int = field(default=1, metadata={"help": "When use nola, the number of basis"})
     use_mora: bool = field(
         default=False, metadata={"help": "Whether to use MoRA: https://arxiv.org/pdf/2405.12130.pdf"}
     )
diff --git a/paddlenlp/peft/lora/lora_layers.py b/paddlenlp/peft/lora/lora_layers.py
@@ -63,6 +63,8 @@ def __init__(
         rslora: bool = False,
         lora_plus_scale: float = 1.0,
         pissa: bool = False,
+        nola: bool = False,
+        nola_basis_num: int = 1,
         lora_use_mixer: bool = False,
         mixer_num: int = 1,
         use_mora: bool = False,
@@ -85,6 +87,8 @@ def __init__(
         # Mark the weight as unmerged
         self.merged = False
         self.pissa = pissa
+        self.nola = nola
+        self.nola_basis_num = nola_basis_num
         self.lora_use_mixer = lora_use_mixer
         self.mixer_num = mixer_num
         self.lorapro = lorapro
@@ -144,6 +148,32 @@ def __init__(
                 ),
             )
         self.apply_pissa = False
+        if nola:
+            # Initialize placeholders for NOLA parameters
+            self.nola_basis_A = self.create_parameter(
+                shape=[nola_basis_num, in_features, r],
+                dtype=self._dtype,
+                is_bias=False,
+            )
+            self.nola_basis_A.stop_gradient = True
+            self.nola_basis_B = self.create_parameter(
+                shape=[nola_basis_num, r, out_features],
+                dtype=self._dtype,
+                is_bias=False,
+            )
+            self.nola_basis_B.stop_gradient = True
+            self.nola_alpha = self.create_parameter(
+                shape=[nola_basis_num],
+                dtype=self._dtype,
+                is_bias=False,
+                default_initializer=nn.initializer.Constant(value=0.0),
+            )
+            self.nola_beta = self.create_parameter(
+                shape=[nola_basis_num],
+                dtype=self._dtype,
+                is_bias=False,
+                default_initializer=nn.initializer.Constant(value=0.0),
+            )
         if use_mora or pissa:
             self.scaling = 1.0
         elif not rslora:
@@ -179,6 +209,16 @@ def pissa_init(self, rank):
         weight = res.astype(dtype)
         self.weight.set_value(weight)
 
+    def get_nola_lora_matrices(self):
+        """Compute LoRA matrices A and B from NOLA basis and coefficients."""
+        if not self.nola:
+            return self.lora_A, self.lora_B
+        # Compute A = sum(alpha_i * A_i)
+        lora_A = paddle.einsum("k,kir->ir", self.nola_alpha, self.nola_basis_A)  # [in_features, r]
+        # Compute B = sum(beta_j * B_j)
+        lora_B = paddle.einsum("k,kro->ro", self.nola_beta, self.nola_basis_B)  # [r, out_features]
+        return lora_A, lora_B
+
     def rope_init(self):
         if self.cos is None or self.sin is None:
             inv_freq = 1.0 / (10000 ** (paddle.arange(0, self.r, 2, dtype=paddle.float32) / self.r))
@@ -257,6 +297,9 @@ def get_delta_weight(self, lora_A=None, lora_B=None, lora_AB=None):
                 w = w[: self.out_features]
             final_weight = w
             delta_weight = final_weight.T
+        elif self.nola:
+            lora_A, lora_B = self.get_nola_lora_matrices()
+            delta_weight = lora_A @ lora_B * self.scaling
         else:
             lora_A = lora_A if lora_A is not None else self.lora_A
             lora_B = lora_B if lora_B is not None else self.lora_B
@@ -299,6 +342,11 @@ def forward(self, input: paddle.Tensor, *args, **kwargs):
             input = self.lora_dropout(input)
             mora_out = self._apply_mora(input)
             result += mora_out
+        elif self.nola:
+            result = F.linear(x=input, weight=self.weight, bias=self.bias, name=self.name)
+            input = self.lora_dropout(input)
+            lora_A, lora_B = self.get_nola_lora_matrices()
+            result += (self.lora_dropout(input) @ lora_A @ lora_B) * self.scaling
         else:
             result = F.linear(x=input, weight=self.weight, bias=self.bias, name=self.name)
             if self.lora_use_mixer:
@@ -327,14 +375,16 @@ def __init__(
         use_quick_lora: bool = False,
         pissa: bool = False,
         use_mora: bool = False,
+        nola: bool = False,
+        nola_basis_num: int = 1,
         **kwargs
     ):
         RowParallelLinear.__init__(self, in_features, out_features, **kwargs)
         if not isinstance(r, int) or r <= 0:
             raise ValueError("Lora rank r should be a positive integer")
 
-        if pissa or use_mora:
-            raise ValueError("Pissa or Mora is not supported in model parallel by now")
+        if pissa or use_mora or nola:
+            raise ValueError("Pissa, Mora or NoLA is not supported in model parallel by now")
 
         self.r = r
         self.lora_alpha = lora_alpha
@@ -593,14 +643,16 @@ def __init__(
         use_quick_lora: bool = False,
         pissa: bool = False,
         use_mora: bool = False,
+        nola: bool = False,
+        nola_basis_num: int = 1,
         **kwargs
     ):
         ColumnParallelLinear.__init__(self, in_features, out_features, **kwargs)
         if not isinstance(r, int) or r <= 0:
             raise ValueError("Lora rank r should be a positive integer")
 
-        if pissa or use_mora:
-            raise ValueError("Pissa or Mora is not supported in model parallel by now")
+        if pissa or use_mora or nola:
+            raise ValueError("Pissa, Mora or NoLA is not supported in model parallel by now")
 
         self.r = r
         self.lora_alpha = lora_alpha
diff --git a/paddlenlp/peft/lora/lora_model.py b/paddlenlp/peft/lora/lora_model.py
@@ -482,6 +482,8 @@ def _find_and_replace_module(self, model, module_name, lora_config):
                 rslora=lora_config.rslora,
                 lora_plus_scale=lora_config.lora_plus_scale,
                 pissa=lora_config.pissa,
+                nola=lora_config.nola,
+                nola_basis_num=lora_config.nola_basis_num,
                 bias_attr=False if module.bias is None else None,
                 use_quick_lora=lora_config.use_quick_lora,
                 lora_use_mixer=lora_config.lora_use_mixer,
@@ -521,6 +523,8 @@ def _find_and_replace_module(self, model, module_name, lora_config):
                 rslora=lora_config.rslora,
                 lora_plus_scale=lora_config.lora_plus_scale,
                 pissa=lora_config.pissa,
+                nola=lora_config.nola,
+                nola_basis_num=lora_config.nola_basis_num,
                 lora_A_weight_attr=paddle.ParamAttr(
                     initializer=nn.initializer.KaimingUniform(negative_slope=math.sqrt(5), nonlinearity="leaky_relu")
                 ),
@@ -547,6 +551,8 @@ def _find_and_replace_module(self, model, module_name, lora_config):
                 rslora=lora_config.rslora,
                 lora_plus_scale=lora_config.lora_plus_scale,
                 pissa=lora_config.pissa,
+                nola=lora_config.nola,
+                nola_basis_num=lora_config.nola_basis_num,
                 use_quick_lora=lora_config.use_quick_lora,
             )
             # Lora column parallel will spilt lora A matrix
diff --git a/paddlenlp/trl/model_config.py b/paddlenlp/trl/model_config.py
@@ -64,6 +64,8 @@ class ModelConfig:
     lora_use_mixer: bool = field(
         default=False, metadata={"help": "Whether to use MosLoRA: https://arxiv.org/pdf/2406.11909"}
     )
+    nola: bool = field(default=False, metadata={"help": "Whether to use Nola: https://arxiv.org/pdf/2310.02556"})
+    nola_basis_num: int = field(default=1, metadata={"help": "When use nola, the number of basis"})
     mixer_num: int = field(default=1, metadata={"help": "Num of mixer matrices."})
     use_mora: bool = field(
         default=False, metadata={"help": "Whether to use MoRA: https://arxiv.org/pdf/2405.12130.pdf"}
diff --git a/tests/fixtures/llm/nola.yaml b/tests/fixtures/llm/nola.yaml
@@ -0,0 +1,65 @@
+nola:
+  base:
+    dataset_name_or_path: "./data"
+    per_device_train_batch_size: 4
+    gradient_accumulation_steps: 4
+    per_device_eval_batch_size: 8
+    eval_accumulation_steps: 16
+    num_train_epochs: 3
+    learning_rate: 3e-04
+    warmup_steps: 30
+    logging_steps: 1
+    evaluation_strategy: "epoch"
+    save_strategy: "epoch"
+    src_length: 1024
+    max_length: 2048
+    fp16: true
+    fp16_opt_level: "O2"
+    do_train: true
+    do_eval: true
+    disable_tqdm: true
+    load_best_model_at_end: true
+    eval_with_do_generation: false
+    metric_for_best_model: "accuracy"
+    recompute: true
+    save_total_limit: 1
+    tensor_parallel_degree: 1
+    pipeline_parallel_degree: 1
+    lora: true
+    nola: true
+    nola_basis_num: 3
+
+  default:
+    llama:
+      model_name_or_path: __internal_testing__/tiny-random-llama
+    chatglm:
+      model_name_or_path: __internal_testing__/tiny-fused-chatglm
+    chatglm2:
+      model_name_or_path: __internal_testing__/tiny-fused-chatglm2
+    bloom:
+      model_name_or_path: __internal_testing__/tiny-fused-bloom
+    qwen:
+      model_name_or_path: __internal_testing__/tiny-fused-qwen
+    baichuan:
+      model_name_or_path: __internal_testing__/tiny-fused-baichuan
+
+inference-predict:
+  default:
+    mode: dynamic 
+    max_length: 20
+    batch_size: 2
+    decode_strategy: greedy_search
+    dtype: float16
+
+inference-to-static:
+  default:
+    dtype: float16
+    max_length: 20
+
+inference-infer:
+  default:
+    mode: static
+    dtype: float16
+    batch_size: 2
+    decode_strategy: greedy_search
+    max_length: 20
diff --git a/tests/llm/test_nola.py b/tests/llm/test_nola.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import os
+import sys
+import unittest
+
+import paddle
+from parameterized import parameterized_class
+
+from tests.testing_utils import argv_context_guard, load_test_config
+
+from .testing_utils import LLMTest
+
+
+@parameterized_class(
+    ["model_dir"],
+    [
+        ["llama"],
+        # ["chatglm"], @skip("Skip and wait to fix.")
+        # ["chatglm2"], @skip("Skip and wait to fix.")
+        # ["bloom"], @skip("Skip and wait to fix.")
+        ["qwen"],
+        ["baichuan"],
+    ],
+)
+class NolaTest(LLMTest, unittest.TestCase):
+    config_path: str = "./tests/fixtures/llm/nola.yaml"
+    model_dir: str = None
+
+    def setUp(self) -> None:
+        LLMTest.setUp(self)
+
+        self.model_codes_dir = os.path.join(self.root_path, self.model_dir)
+        sys.path.insert(0, self.model_codes_dir)
+
+    def tearDown(self) -> None:
+        LLMTest.tearDown(self)
+        sys.path.remove(self.model_codes_dir)
+
+    def test_nola(self):
+        self.disable_static()
+        paddle.set_default_dtype("float32")
+        nola_config = load_test_config(self.config_path, "nola", self.model_dir)
+        nola_config["output_dir"] = self.output_dir
+        nola_config["dataset_name_or_path"] = self.data_dir
+
+        with argv_context_guard(nola_config):
+            from run_finetune import main
+
+            main()
+
+        # merge weights
+        merge_lora_weights_config = {
+            "lora_path": nola_config["output_dir"],
+            "model_name_or_path": nola_config["model_name_or_path"],
+            "output_path": nola_config["output_dir"],
+        }
+        with argv_context_guard(merge_lora_weights_config):
+            from tools.merge_lora_params import merge
+
+            merge()
+
+        # TODO(wj-Mcat): disable chatglm2 test temporarily
+        if self.model_dir not in ["qwen", "baichuan", "chatglm2"]:
+            self.run_predictor({"inference_model": True})
+
+        self.run_predictor({"inference_model": False})
diff --git a/tests/peft/test_nola.py b/tests/peft/test_nola.py

Original file line number	Diff line number	Diff line change
`@@ -77,6 +77,8 @@ class LoRAConfig:`
`77`	`77`	`rslora: bool = field(default=False, metadata={"help": "Whether to use RsLoRA"})`
`78`	`78`	`pissa: bool = field(default=False, metadata={"help": "Whether to use Pissa: https://arxiv.org/pdf/2404.02948.pdf"})`
`79`	`79`	`loraga: bool = field(default=False, metadata={"help": "Whether to LoRA-GA"})`
	`80`	`+ nola: bool = field(default=False, metadata={"help": "Whether to use Nola: https://arxiv.org/pdf/2310.02556"})`
	`81`	`+ nola_basis_num: int = field(default=1, metadata={"help": "When use nola, the number of basis"})`
`80`	`82`	`use_mora: bool = field(`
`81`	`83`	`default=False, metadata={"help": "Whether to use MoRA: https://arxiv.org/pdf/2405.12130.pdf"}`
`82`	`84`	`)`
Original file line number	Diff line number	Diff line change
`@@ -64,6 +64,8 @@ class ModelConfig:`
`64`	`64`	`lora_use_mixer: bool = field(`
`65`	`65`	`default=False, metadata={"help": "Whether to use MosLoRA: https://arxiv.org/pdf/2406.11909"}`
`66`	`66`	`)`
	`67`	`+ nola: bool = field(default=False, metadata={"help": "Whether to use Nola: https://arxiv.org/pdf/2310.02556"})`
	`68`	`+ nola_basis_num: int = field(default=1, metadata={"help": "When use nola, the number of basis"})`
`67`	`69`	`mixer_num: int = field(default=1, metadata={"help": "Num of mixer matrices."})`
`68`	`70`	`use_mora: bool = field(`
`69`	`71`	`default=False, metadata={"help": "Whether to use MoRA: https://arxiv.org/pdf/2405.12130.pdf"}`