Skip to content

Commit a11b5ec

Browse files
authored
Merge pull request #1321 from lzjpaul/25-8-20-dev
2 parents e6f78c8 + 83670bd commit a11b5ec

File tree

1 file changed

+89
-0
lines changed
  • examples/singa_peft/src/singa_peft/tuners/linear_lora

1 file changed

+89
-0
lines changed
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
import math
21+
from singa import tensor
22+
from singa import autograd
23+
from singa import layer
24+
25+
26+
class LinearLoRALayer(layer.Layer):
27+
"""
28+
LinearLoRALayer: LoRA implemented in a linear layer
29+
"""
30+
def __init__(
31+
self,
32+
base_layer: layer.Linear,
33+
r: int = 8,
34+
lora_alpha: int = 1,
35+
lora_dropout: float = 0.,
36+
):
37+
r"""
38+
Args:
39+
base_layer: a linear layer, The input and output channels of the linear lora layer are equal to this base layer.
40+
r: the rank in LoRA, which determines the size of the low-rank matrix. An integer greater than 0 is required, default 8.
41+
lora_alpha: learning rate scaling factor, default 1
42+
lora_dropout: dropout ratio, default 0.
43+
"""
44+
super().__init__()
45+
if r <= 0:
46+
raise ValueError(f"`r` should be a positive integer value but the value passed is {r}")
47+
self.r = r
48+
self.base_layer = base_layer
49+
self.in_features = base_layer.in_features
50+
self.out_features = base_layer.out_features
51+
self.lora_alpha = lora_alpha
52+
self.lora_dropout = lora_dropout
53+
self.merged = False
54+
55+
56+
def initialize(self, x):
57+
# freeze weights of base layer
58+
if self.base_layer._initialized is False:
59+
self.base_layer.initialize(x)
60+
self.freeze_pretrained_weight(True)
61+
# actual trainable parameters
62+
lora_A_shape = (self.r, self.in_features)
63+
lora_B_shape = (self.out_features, self.r)
64+
self.lora_A = tensor.Tensor(
65+
shape=lora_A_shape,
66+
dtype=x.dtype,
67+
requires_grad=True,
68+
stores_grad=True
69+
)
70+
self.lora_B = tensor.Tensor(
71+
shape=lora_B_shape,
72+
dtype=x.dtype,
73+
requires_grad=True,
74+
stores_grad=True
75+
)
76+
std = math.sqrt(2.0 / (self.in_features + self.out_features))
77+
# initialize A the same way as the default for nn.Linear and B to zero
78+
self.lora_A.gaussian(0.0, std)
79+
self.lora_B.set_value(0.0)
80+
self.scaling = tensor.Tensor(shape=(1,), requires_grad=False, stores_grad=False)
81+
self.scaling.set_value(1.0 * self.lora_alpha / self.r)
82+
83+
def freeze_pretrained_weight(self, freeze: bool = True):
84+
# freeze weights of base layer
85+
self.base_layer.W.requires_grad = not freeze
86+
self.base_layer.W.stores_grad = not freeze
87+
if self.base_layer.b is not None:
88+
self.base_layer.b.requires_grad = not freeze
89+
self.base_layer.b.stores_grad = not freeze

0 commit comments

Comments
 (0)