Skip to content

Commit 314ca22

Browse files
Starrysea996maxiaolong001
authored andcommitted
[API compatibility] change gelu api for paddle (PaddlePaddle#74485)
* change gelu api * simple gelu function and class * replace str with literal
1 parent 0d490f7 commit 314ca22

File tree

3 files changed

+172
-37
lines changed

3 files changed

+172
-37
lines changed

python/paddle/nn/functional/activation.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
from __future__ import annotations
1616

17-
from typing import TYPE_CHECKING
17+
from typing import TYPE_CHECKING, Literal
1818

1919
import paddle
2020
from paddle import _C_ops, in_dynamic_mode
@@ -150,14 +150,18 @@ def elu_(x: Tensor, alpha: float = 1.0, name: str | None = None) -> Tensor:
150150

151151

152152
def gelu(
153-
x: Tensor, approximate: bool = False, name: str | None = None
153+
x: Tensor,
154+
approximate: Literal["tanh", "none"] | bool = False,
155+
name: str | None = None,
154156
) -> Tensor:
155157
r"""
156158
gelu activation.
157159
158160
The activation function of Gelu is calculated element by element. More information refers to :ref: `Gaussian Error Linear Units`.
159161
160-
if approximate is True
162+
approximate parameter must be True, False, "tanh", "none".
163+
164+
if approximate is True or "tanh"
161165
162166
.. math::
163167
@@ -171,7 +175,7 @@ def gelu(
171175
172176
Parameters:
173177
x (Tensor): The input Tensor with data type float32, float64.
174-
approximate (bool, optional): Whether to enable approximation. Default is False.
178+
approximate (str|bool, optional): Whether to enable approximation. Default is False.
175179
name (str|None, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
176180
177181
Returns:
@@ -194,8 +198,23 @@ def gelu(
194198
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
195199
[[-0.15880796, 0.34571400],
196200
[ 0.84119201, 1.39957154]])
201+
>>> out3 = F.gelu(x, "none")
202+
>>> print(out3)
203+
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
204+
[[-0.15865529, 0.34573123],
205+
[ 0.84134471, 1.39978933]])
206+
>>> out4 = F.gelu(x, "tanh")
207+
>>> print(out4)
208+
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
209+
[[-0.15880796, 0.34571400],
210+
[ 0.84119201, 1.39957154]])
197211
"""
198212

213+
if approximate == "tanh":
214+
approximate = True
215+
elif approximate == "none":
216+
approximate = False
217+
199218
if in_dynamic_or_pir_mode():
200219
return _C_ops.gelu(x, approximate)
201220
else:

python/paddle/nn/layer/activation.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
from __future__ import annotations
1717

18-
from typing import TYPE_CHECKING
18+
from typing import TYPE_CHECKING, Literal
1919

2020
from paddle.framework import get_default_dtype
2121

@@ -176,7 +176,9 @@ class GELU(Layer):
176176
r"""
177177
GELU Activation.
178178
179-
If approximate is True
179+
approximate parameter must be True, False, "tanh", "none".
180+
181+
If approximate is True or "tanh"
180182
181183
.. math::
182184
@@ -189,7 +191,7 @@ class GELU(Layer):
189191
GELU(x) = 0.5 * x * (1 + erf(\frac{x}{\sqrt{2}}))
190192
191193
Parameters:
192-
approximate (bool, optional): Whether to enable approximation. Default is False.
194+
approximate (str|bool, optional): Whether to enable approximation. Default is False.
193195
name (str|None, optional): Name for the operation (optional, default is None).
194196
For more information, please refer to :ref:`api_guide_Name`.
195197
@@ -208,6 +210,24 @@ class GELU(Layer):
208210
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
209211
[[-0.15865529, 0.34573123],
210212
[ 0.84134471, 1.39978933]])
213+
>>> m = paddle.nn.GELU(False)
214+
>>> out = m(x)
215+
>>> print(out)
216+
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
217+
[[-0.15865529, 0.34573123],
218+
[ 0.84134471, 1.39978933]])
219+
>>> m = paddle.nn.GELU("none")
220+
>>> out = m(x)
221+
>>> print(out)
222+
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
223+
[[-0.15865529, 0.34573123],
224+
[ 0.84134471, 1.39978933]])
225+
>>> m = paddle.nn.GELU("tanh")
226+
>>> out = m(x)
227+
>>> print(out)
228+
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
229+
[[-0.15880796, 0.34571400],
230+
[ 0.84119201, 1.39957154]])
211231
>>> m = paddle.nn.GELU(True)
212232
>>> out = m(x)
213233
>>> print(out)
@@ -217,7 +237,9 @@ class GELU(Layer):
217237
"""
218238

219239
def __init__(
220-
self, approximate: bool = False, name: str | None = None
240+
self,
241+
approximate: Literal["tanh", "none"] | bool = False,
242+
name: str | None = None,
221243
) -> None:
222244
super().__init__()
223245
self._approximate = approximate

test/legacy_test/test_gelu_op.py

Lines changed: 123 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,14 @@
2020
import paddle
2121
import paddle.base.dygraph as dg
2222
import paddle.nn.functional as F
23-
from paddle import base
23+
from paddle import base, nn
2424

2525

2626
def gelu(x, approximate):
27+
if approximate == "tanh":
28+
approximate = True
29+
if approximate == "none":
30+
approximate = False
2731
if approximate:
2832
y_ref = (
2933
0.5
@@ -46,9 +50,14 @@ def _test_case1_cpu(self, approximate):
4650
place = base.CPUPlace()
4751
with dg.guard(place) as g:
4852
x_var = paddle.to_tensor(x)
49-
y_var = F.gelu(x_var, approximate)
50-
y_test = y_var.numpy()
51-
np.testing.assert_allclose(y_ref, y_test, rtol=1e-05, atol=1e-08)
53+
y_var1 = F.gelu(x_var, approximate)
54+
y_test1 = y_var1.numpy()
55+
56+
func = nn.GELU(approximate)
57+
y_var2 = func(x_var)
58+
y_test2 = y_var2.numpy()
59+
np.testing.assert_allclose(y_ref, y_test1, rtol=1e-05, atol=1e-08)
60+
np.testing.assert_allclose(y_ref, y_test2, rtol=1e-05, atol=1e-08)
5261

5362
def _test_case1_gpu(self, approximate):
5463
x = np.random.uniform(-1, 1, size=(11, 17)).astype(np.float32)
@@ -57,12 +66,17 @@ def _test_case1_gpu(self, approximate):
5766
place = base.CUDAPlace(0)
5867
with dg.guard(place) as g:
5968
x_var = paddle.to_tensor(x)
60-
y_var = F.gelu(x_var, approximate)
61-
y_test = y_var.numpy()
62-
np.testing.assert_allclose(y_ref, y_test, rtol=1e-05, atol=1e-08)
69+
y_var1 = F.gelu(x_var, approximate)
70+
y_test1 = y_var1.numpy()
71+
72+
func = nn.GELU(approximate)
73+
y_var2 = func(x_var)
74+
y_test2 = y_var2.numpy()
75+
np.testing.assert_allclose(y_ref, y_test1, rtol=1e-05, atol=1e-08)
76+
np.testing.assert_allclose(y_ref, y_test2, rtol=1e-05, atol=1e-08)
6377

6478
def test_cases(self):
65-
for approximate in [True, False]:
79+
for approximate in [True, False, "none", "tanh"]:
6680
self._test_case1_cpu(approximate)
6781
if base.is_compiled_with_cuda():
6882
self._test_case1_gpu(approximate)
@@ -86,15 +100,36 @@ def run_gelu_op(approximate):
86100
x_grad = paddle.grad([y], [x], [paddle.to_tensor(y_g_np)])[0]
87101
return y.numpy(), x_grad.numpy()
88102

103+
def run_gelu_class(approximate):
104+
with dg.guard():
105+
x = paddle.to_tensor(x_np)
106+
x.stop_gradient = False
107+
func = nn.GELU(approximate=approximate)
108+
y = func(x)
109+
x_grad = paddle.grad([y], [x], [paddle.to_tensor(y_g_np)])[0]
110+
return y.numpy(), x_grad.numpy()
111+
89112
use_fast_math(True)
90-
y_fast_math, x_g_fast_math = run_gelu_op(True)
113+
y_fast_math1, x_g_fast_math1 = run_gelu_op(True)
114+
y_fast_math2, x_g_fast_math2 = run_gelu_class(True)
91115
use_fast_math(False)
92116

93-
y_ref, x_g_ref = run_gelu_op(True)
94-
np.testing.assert_allclose(y_ref, y_fast_math, rtol=1e-05, atol=0.0005)
117+
y_ref1, x_g_ref1 = run_gelu_op(True)
118+
y_ref2, x_g_ref2 = run_gelu_class(True)
119+
np.testing.assert_allclose(
120+
y_ref1, y_fast_math1, rtol=1e-05, atol=0.0005
121+
)
122+
123+
np.testing.assert_allclose(
124+
x_g_ref1, x_g_fast_math1, rtol=1e-05, atol=0.0005
125+
)
126+
127+
np.testing.assert_allclose(
128+
y_ref2, y_fast_math2, rtol=1e-05, atol=0.0005
129+
)
95130

96131
np.testing.assert_allclose(
97-
x_g_ref, x_g_fast_math, rtol=1e-05, atol=0.0005
132+
x_g_ref2, x_g_fast_math2, rtol=1e-05, atol=0.0005
98133
)
99134

100135

@@ -105,38 +140,97 @@ def _test_case1_cpu(self, approximate):
105140

106141
place = base.CPUPlace()
107142
with dg.guard(place) as g:
108-
x_var = paddle.to_tensor(x)
109-
x_var.stop_gradient = False
110-
y_var = F.gelu(x_var, approximate)
111-
y_test = y_var.numpy()
143+
x_var1 = paddle.to_tensor(x)
144+
x_var2 = paddle.to_tensor(x)
145+
146+
x_var1.stop_gradient = False
147+
x_var2.stop_gradient = False
148+
149+
y_var1 = F.gelu(x_var1, approximate)
150+
y_test1 = y_var1.numpy()
151+
152+
func = nn.GELU(approximate)
153+
y_var2 = func(x_var2)
154+
y_test2 = y_var2.numpy()
112155

113-
loss = paddle.sum(y_var)
114-
loss.backward()
115-
np.testing.assert_allclose(y_ref, y_test, rtol=1e-05, atol=1e-08)
116-
np.testing.assert_allclose(x_var.grad.shape, x_var.shape)
156+
loss1 = paddle.sum(y_var1)
157+
loss1.backward()
158+
159+
loss2 = paddle.sum(y_var2)
160+
loss2.backward()
161+
np.testing.assert_allclose(y_ref, y_test1, rtol=1e-05, atol=1e-08)
162+
np.testing.assert_allclose(x_var1.grad.shape, x_var1.shape)
163+
164+
np.testing.assert_allclose(y_ref, y_test2, rtol=1e-05, atol=1e-08)
165+
np.testing.assert_allclose(x_var2.grad.shape, x_var2.shape)
117166

118167
def _test_case1_gpu(self, approximate):
119168
x = np.random.uniform(-1, 1, size=(0, 17)).astype(np.float32)
120169
y_ref = gelu(x, approximate)
121170

122171
place = base.CUDAPlace(0)
123172
with dg.guard(place) as g:
124-
x_var = paddle.to_tensor(x)
125-
x_var.stop_gradient = False
126-
y_var = F.gelu(x_var, approximate)
127-
y_test = y_var.numpy()
173+
x_var1 = paddle.to_tensor(x)
174+
x_var2 = paddle.to_tensor(x)
175+
176+
x_var1.stop_gradient = False
177+
x_var2.stop_gradient = False
178+
179+
y_var1 = F.gelu(x_var1, approximate)
180+
y_test1 = y_var1.numpy()
128181

129-
loss = paddle.sum(y_var)
130-
loss.backward()
131-
np.testing.assert_allclose(y_ref, y_test, rtol=1e-05, atol=1e-08)
132-
np.testing.assert_allclose(x_var.grad.shape, x_var.shape)
182+
func = nn.GELU(approximate)
183+
y_var2 = func(x_var2)
184+
y_test2 = y_var2.numpy()
185+
186+
loss1 = paddle.sum(y_var1)
187+
loss1.backward()
188+
189+
loss2 = paddle.sum(y_var2)
190+
loss2.backward()
191+
np.testing.assert_allclose(y_ref, y_test1, rtol=1e-05, atol=1e-08)
192+
np.testing.assert_allclose(x_var1.grad.shape, x_var1.shape)
193+
194+
np.testing.assert_allclose(y_ref, y_test2, rtol=1e-05, atol=1e-08)
195+
np.testing.assert_allclose(x_var2.grad.shape, x_var2.shape)
133196

134197
def test_cases(self):
135-
for approximate in [True, False]:
198+
for approximate in [True, False, "none", "tanh"]:
136199
self._test_case1_cpu(approximate)
137200
if base.is_compiled_with_cuda():
138201
self._test_case1_gpu(approximate)
139202

140203

204+
class TestGeluError(unittest.TestCase):
205+
206+
def setUp(self):
207+
x = np.random.uniform(-1, 1, size=(11, 17)).astype(np.float32)
208+
self.x = paddle.to_tensor(x)
209+
210+
def test_gelu_op_error(self):
211+
212+
def test_type_error1():
213+
y = F.gelu(self.x, "tan")
214+
215+
def test_type_error2():
216+
y = F.gelu(self.x, 1234)
217+
218+
self.assertRaises(TypeError, test_type_error1)
219+
self.assertRaises(TypeError, test_type_error2)
220+
221+
def test_gelu_class_error(self):
222+
223+
def test_type_error1():
224+
func = nn.GELU("tan")
225+
y = func(self.x)
226+
227+
def test_type_error2():
228+
func = nn.GELU(1234)
229+
y = func(self.x)
230+
231+
self.assertRaises(TypeError, test_type_error1)
232+
self.assertRaises(TypeError, test_type_error2)
233+
234+
141235
if __name__ == '__main__':
142236
unittest.main()

0 commit comments

Comments
 (0)