20
20
import paddle
21
21
import paddle .base .dygraph as dg
22
22
import paddle .nn .functional as F
23
- from paddle import base
23
+ from paddle import base , nn
24
24
25
25
26
26
def gelu (x , approximate ):
27
+ if approximate == "tanh" :
28
+ approximate = True
29
+ if approximate == "none" :
30
+ approximate = False
27
31
if approximate :
28
32
y_ref = (
29
33
0.5
@@ -46,9 +50,14 @@ def _test_case1_cpu(self, approximate):
46
50
place = base .CPUPlace ()
47
51
with dg .guard (place ) as g :
48
52
x_var = paddle .to_tensor (x )
49
- y_var = F .gelu (x_var , approximate )
50
- y_test = y_var .numpy ()
51
- np .testing .assert_allclose (y_ref , y_test , rtol = 1e-05 , atol = 1e-08 )
53
+ y_var1 = F .gelu (x_var , approximate )
54
+ y_test1 = y_var1 .numpy ()
55
+
56
+ func = nn .GELU (approximate )
57
+ y_var2 = func (x_var )
58
+ y_test2 = y_var2 .numpy ()
59
+ np .testing .assert_allclose (y_ref , y_test1 , rtol = 1e-05 , atol = 1e-08 )
60
+ np .testing .assert_allclose (y_ref , y_test2 , rtol = 1e-05 , atol = 1e-08 )
52
61
53
62
def _test_case1_gpu (self , approximate ):
54
63
x = np .random .uniform (- 1 , 1 , size = (11 , 17 )).astype (np .float32 )
@@ -57,12 +66,17 @@ def _test_case1_gpu(self, approximate):
57
66
place = base .CUDAPlace (0 )
58
67
with dg .guard (place ) as g :
59
68
x_var = paddle .to_tensor (x )
60
- y_var = F .gelu (x_var , approximate )
61
- y_test = y_var .numpy ()
62
- np .testing .assert_allclose (y_ref , y_test , rtol = 1e-05 , atol = 1e-08 )
69
+ y_var1 = F .gelu (x_var , approximate )
70
+ y_test1 = y_var1 .numpy ()
71
+
72
+ func = nn .GELU (approximate )
73
+ y_var2 = func (x_var )
74
+ y_test2 = y_var2 .numpy ()
75
+ np .testing .assert_allclose (y_ref , y_test1 , rtol = 1e-05 , atol = 1e-08 )
76
+ np .testing .assert_allclose (y_ref , y_test2 , rtol = 1e-05 , atol = 1e-08 )
63
77
64
78
def test_cases (self ):
65
- for approximate in [True , False ]:
79
+ for approximate in [True , False , "none" , "tanh" ]:
66
80
self ._test_case1_cpu (approximate )
67
81
if base .is_compiled_with_cuda ():
68
82
self ._test_case1_gpu (approximate )
@@ -86,15 +100,36 @@ def run_gelu_op(approximate):
86
100
x_grad = paddle .grad ([y ], [x ], [paddle .to_tensor (y_g_np )])[0 ]
87
101
return y .numpy (), x_grad .numpy ()
88
102
103
+ def run_gelu_class (approximate ):
104
+ with dg .guard ():
105
+ x = paddle .to_tensor (x_np )
106
+ x .stop_gradient = False
107
+ func = nn .GELU (approximate = approximate )
108
+ y = func (x )
109
+ x_grad = paddle .grad ([y ], [x ], [paddle .to_tensor (y_g_np )])[0 ]
110
+ return y .numpy (), x_grad .numpy ()
111
+
89
112
use_fast_math (True )
90
- y_fast_math , x_g_fast_math = run_gelu_op (True )
113
+ y_fast_math1 , x_g_fast_math1 = run_gelu_op (True )
114
+ y_fast_math2 , x_g_fast_math2 = run_gelu_class (True )
91
115
use_fast_math (False )
92
116
93
- y_ref , x_g_ref = run_gelu_op (True )
94
- np .testing .assert_allclose (y_ref , y_fast_math , rtol = 1e-05 , atol = 0.0005 )
117
+ y_ref1 , x_g_ref1 = run_gelu_op (True )
118
+ y_ref2 , x_g_ref2 = run_gelu_class (True )
119
+ np .testing .assert_allclose (
120
+ y_ref1 , y_fast_math1 , rtol = 1e-05 , atol = 0.0005
121
+ )
122
+
123
+ np .testing .assert_allclose (
124
+ x_g_ref1 , x_g_fast_math1 , rtol = 1e-05 , atol = 0.0005
125
+ )
126
+
127
+ np .testing .assert_allclose (
128
+ y_ref2 , y_fast_math2 , rtol = 1e-05 , atol = 0.0005
129
+ )
95
130
96
131
np .testing .assert_allclose (
97
- x_g_ref , x_g_fast_math , rtol = 1e-05 , atol = 0.0005
132
+ x_g_ref2 , x_g_fast_math2 , rtol = 1e-05 , atol = 0.0005
98
133
)
99
134
100
135
@@ -105,38 +140,97 @@ def _test_case1_cpu(self, approximate):
105
140
106
141
place = base .CPUPlace ()
107
142
with dg .guard (place ) as g :
108
- x_var = paddle .to_tensor (x )
109
- x_var .stop_gradient = False
110
- y_var = F .gelu (x_var , approximate )
111
- y_test = y_var .numpy ()
143
+ x_var1 = paddle .to_tensor (x )
144
+ x_var2 = paddle .to_tensor (x )
145
+
146
+ x_var1 .stop_gradient = False
147
+ x_var2 .stop_gradient = False
148
+
149
+ y_var1 = F .gelu (x_var1 , approximate )
150
+ y_test1 = y_var1 .numpy ()
151
+
152
+ func = nn .GELU (approximate )
153
+ y_var2 = func (x_var2 )
154
+ y_test2 = y_var2 .numpy ()
112
155
113
- loss = paddle .sum (y_var )
114
- loss .backward ()
115
- np .testing .assert_allclose (y_ref , y_test , rtol = 1e-05 , atol = 1e-08 )
116
- np .testing .assert_allclose (x_var .grad .shape , x_var .shape )
156
+ loss1 = paddle .sum (y_var1 )
157
+ loss1 .backward ()
158
+
159
+ loss2 = paddle .sum (y_var2 )
160
+ loss2 .backward ()
161
+ np .testing .assert_allclose (y_ref , y_test1 , rtol = 1e-05 , atol = 1e-08 )
162
+ np .testing .assert_allclose (x_var1 .grad .shape , x_var1 .shape )
163
+
164
+ np .testing .assert_allclose (y_ref , y_test2 , rtol = 1e-05 , atol = 1e-08 )
165
+ np .testing .assert_allclose (x_var2 .grad .shape , x_var2 .shape )
117
166
118
167
def _test_case1_gpu (self , approximate ):
119
168
x = np .random .uniform (- 1 , 1 , size = (0 , 17 )).astype (np .float32 )
120
169
y_ref = gelu (x , approximate )
121
170
122
171
place = base .CUDAPlace (0 )
123
172
with dg .guard (place ) as g :
124
- x_var = paddle .to_tensor (x )
125
- x_var .stop_gradient = False
126
- y_var = F .gelu (x_var , approximate )
127
- y_test = y_var .numpy ()
173
+ x_var1 = paddle .to_tensor (x )
174
+ x_var2 = paddle .to_tensor (x )
175
+
176
+ x_var1 .stop_gradient = False
177
+ x_var2 .stop_gradient = False
178
+
179
+ y_var1 = F .gelu (x_var1 , approximate )
180
+ y_test1 = y_var1 .numpy ()
128
181
129
- loss = paddle .sum (y_var )
130
- loss .backward ()
131
- np .testing .assert_allclose (y_ref , y_test , rtol = 1e-05 , atol = 1e-08 )
132
- np .testing .assert_allclose (x_var .grad .shape , x_var .shape )
182
+ func = nn .GELU (approximate )
183
+ y_var2 = func (x_var2 )
184
+ y_test2 = y_var2 .numpy ()
185
+
186
+ loss1 = paddle .sum (y_var1 )
187
+ loss1 .backward ()
188
+
189
+ loss2 = paddle .sum (y_var2 )
190
+ loss2 .backward ()
191
+ np .testing .assert_allclose (y_ref , y_test1 , rtol = 1e-05 , atol = 1e-08 )
192
+ np .testing .assert_allclose (x_var1 .grad .shape , x_var1 .shape )
193
+
194
+ np .testing .assert_allclose (y_ref , y_test2 , rtol = 1e-05 , atol = 1e-08 )
195
+ np .testing .assert_allclose (x_var2 .grad .shape , x_var2 .shape )
133
196
134
197
def test_cases (self ):
135
- for approximate in [True , False ]:
198
+ for approximate in [True , False , "none" , "tanh" ]:
136
199
self ._test_case1_cpu (approximate )
137
200
if base .is_compiled_with_cuda ():
138
201
self ._test_case1_gpu (approximate )
139
202
140
203
204
+ class TestGeluError (unittest .TestCase ):
205
+
206
+ def setUp (self ):
207
+ x = np .random .uniform (- 1 , 1 , size = (11 , 17 )).astype (np .float32 )
208
+ self .x = paddle .to_tensor (x )
209
+
210
+ def test_gelu_op_error (self ):
211
+
212
+ def test_type_error1 ():
213
+ y = F .gelu (self .x , "tan" )
214
+
215
+ def test_type_error2 ():
216
+ y = F .gelu (self .x , 1234 )
217
+
218
+ self .assertRaises (TypeError , test_type_error1 )
219
+ self .assertRaises (TypeError , test_type_error2 )
220
+
221
+ def test_gelu_class_error (self ):
222
+
223
+ def test_type_error1 ():
224
+ func = nn .GELU ("tan" )
225
+ y = func (self .x )
226
+
227
+ def test_type_error2 ():
228
+ func = nn .GELU (1234 )
229
+ y = func (self .x )
230
+
231
+ self .assertRaises (TypeError , test_type_error1 )
232
+ self .assertRaises (TypeError , test_type_error2 )
233
+
234
+
141
235
if __name__ == '__main__' :
142
236
unittest .main ()
0 commit comments