Skip to content

Commit 4bf168b

Browse files
committed
add fp16 kernel for elementwise add
1 parent b3f076a commit 4bf168b

File tree

2 files changed

+51
-24
lines changed

2 files changed

+51
-24
lines changed

paddle/fluid/operators/elementwise_add_op.cu

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,20 @@ limitations under the License. */
1414

1515
#define EIGEN_USE_GPU
1616
#include "paddle/fluid/operators/elementwise_add_op.h"
17+
#include "paddle/fluid/platform/float16.h"
1718

1819
namespace ops = paddle::operators;
20+
namespace plat = padddle::platform;
1921

2022
REGISTER_OP_CUDA_KERNEL(
21-
elementwise_add,
22-
ops::ElementwiseAddKernel<paddle::platform::CUDADeviceContext, float>,
23-
ops::ElementwiseAddKernel<paddle::platform::CUDADeviceContext, double>,
24-
ops::ElementwiseAddKernel<paddle::platform::CUDADeviceContext, int>,
25-
ops::ElementwiseAddKernel<paddle::platform::CUDADeviceContext, int64_t>);
23+
elementwise_add, ops::ElementwiseAddKernel<plat::CUDADeviceContext, float>,
24+
ops::ElementwiseAddKernel<plat::CUDADeviceContext, double>,
25+
ops::ElementwiseAddKernel<plat::CUDADeviceContext, int>,
26+
ops::ElementwiseAddKernel<plat::CUDADeviceContext, int64_t>
27+
ops::ElementwiseAddKernel<plat::CUDADeviceContext, plat::float16>);
2628
REGISTER_OP_CUDA_KERNEL(
2729
elementwise_add_grad,
28-
ops::ElementwiseAddGradKernel<paddle::platform::CUDADeviceContext, float>,
29-
ops::ElementwiseAddGradKernel<paddle::platform::CUDADeviceContext, double>,
30-
ops::ElementwiseAddGradKernel<paddle::platform::CUDADeviceContext, int>,
31-
ops::ElementwiseAddGradKernel<paddle::platform::CUDADeviceContext,
32-
int64_t>);
30+
ops::ElementwiseAddGradKernel<plat::CUDADeviceContext, float>,
31+
ops::ElementwiseAddGradKernel<plat::CUDADeviceContext, double>,
32+
ops::ElementwiseAddGradKernel<plat::CUDADeviceContext, int>,
33+
ops::ElementwiseAddGradKernel<plat::CUDADeviceContext, int64_t>);

python/paddle/fluid/tests/unittests/test_elementwise_add_op.py

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,34 +13,60 @@
1313
# limitations under the License.
1414
import unittest
1515
import numpy as np
16+
import paddle.fluid.core as core
1617
from op_test import OpTest
1718

1819

19-
class TestElementwiseOp(OpTest):
20+
class TestElementwiseAddOp(OpTest):
2021
def setUp(self):
2122
self.op_type = "elementwise_add"
23+
self.dtype = np.float32
24+
init_dtype()
25+
26+
x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
27+
y = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
2228
self.inputs = {
23-
'X': np.random.uniform(0.1, 1, [13, 17]).astype("float32"),
24-
'Y': np.random.uniform(0.1, 1, [13, 17]).astype("float32")
29+
'X': OpTest.np_dtype_to_fluid_dtype(x),
30+
'Y': OpTest.np_dtype_to_fluid_dtype(y)
2531
}
26-
self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['Y'])}
32+
self.outputs = {'Out': np.add(x, y)}
2733

2834
def test_check_output(self):
2935
self.check_output()
3036

3137
def test_check_grad_normal(self):
38+
if self.dtype == np.float16:
39+
return
3240
self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.005)
3341

3442
def test_check_grad_ingore_x(self):
43+
if self.dtype == np.float16:
44+
return
3545
self.check_grad(
3646
['Y'], 'Out', max_relative_error=0.005, no_grad_set=set("X"))
3747

3848
def test_check_grad_ingore_y(self):
49+
if self.dtype == np.float16:
50+
return
3951
self.check_grad(
4052
['X'], 'Out', max_relative_error=0.005, no_grad_set=set('Y'))
4153

54+
def init_dtype():
55+
pass
56+
57+
58+
class TestFP16ElementwiseAddOp(TestElementwiseAddOp):
59+
def init_dtype():
60+
self.dtype = np.float16
61+
62+
def test_check_output(self):
63+
if core.is_compiled_with_cuda():
64+
place = core.CUDAPlace(0)
65+
if core.is_float16_supported(place):
66+
self.check_output_with_place(place, atol=1e-3)
67+
4268

43-
class TestElementwiseAddOp_scalar(TestElementwiseOp):
69+
class TestElementwiseAddOp_scalar(TestElementwiseAddOp):
4470
def setUp(self):
4571
self.op_type = "elementwise_add"
4672
self.inputs = {
@@ -50,7 +76,7 @@ def setUp(self):
5076
self.outputs = {'Out': self.inputs['X'] + self.inputs['Y']}
5177

5278

53-
class TestElementwiseAddOp_scalar2(TestElementwiseOp):
79+
class TestElementwiseAddOp_scalar2(TestElementwiseAddOp):
5480
def setUp(self):
5581
self.op_type = "elementwise_add"
5682
self.inputs = {
@@ -60,7 +86,7 @@ def setUp(self):
6086
self.outputs = {'Out': self.inputs['X'] + self.inputs['Y']}
6187

6288

63-
class TestElementwiseAddOp_Vector(TestElementwiseOp):
89+
class TestElementwiseAddOp_Vector(TestElementwiseAddOp):
6490
def setUp(self):
6591
self.op_type = "elementwise_add"
6692
self.inputs = {
@@ -70,7 +96,7 @@ def setUp(self):
7096
self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['Y'])}
7197

7298

73-
class TestElementwiseAddOp_broadcast_0(TestElementwiseOp):
99+
class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp):
74100
def setUp(self):
75101
self.op_type = "elementwise_add"
76102
self.inputs = {
@@ -84,7 +110,7 @@ def setUp(self):
84110
}
85111

86112

87-
class TestElementwiseAddOp_broadcast_1(TestElementwiseOp):
113+
class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp):
88114
def setUp(self):
89115
self.op_type = "elementwise_add"
90116
self.inputs = {
@@ -98,7 +124,7 @@ def setUp(self):
98124
}
99125

100126

101-
class TestElementwiseAddOp_broadcast_2(TestElementwiseOp):
127+
class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp):
102128
def setUp(self):
103129
self.op_type = "elementwise_add"
104130
self.inputs = {
@@ -111,7 +137,7 @@ def setUp(self):
111137
}
112138

113139

114-
class TestElementwiseAddOp_broadcast_3(TestElementwiseOp):
140+
class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp):
115141
def setUp(self):
116142
self.op_type = "elementwise_add"
117143
self.inputs = {
@@ -125,7 +151,7 @@ def setUp(self):
125151
}
126152

127153

128-
class TestElementwiseAddOp_broadcast_4(TestElementwiseOp):
154+
class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp):
129155
def setUp(self):
130156
self.op_type = "elementwise_add"
131157
self.inputs = {
@@ -139,7 +165,7 @@ def setUp(self):
139165
}
140166

141167

142-
class TestElementwiseAddOp_rowwise_add_0(TestElementwiseOp):
168+
class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp):
143169
def setUp(self):
144170
self.op_type = "elementwise_add"
145171
self.inputs = {
@@ -153,7 +179,7 @@ def setUp(self):
153179
}
154180

155181

156-
class TestElementwiseAddOp_rowwise_add_1(TestElementwiseOp):
182+
class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp):
157183
def setUp(self):
158184
self.op_type = "elementwise_add"
159185
self.inputs = {

0 commit comments

Comments
 (0)