Skip to content

Commit 8e73101

Browse files
authored
Merge pull request #9143 from kexinzhao/numpy_conv2d_pool2d_fp16
Add float16 support for cudnn conv2d
2 parents c0511c3 + e967d19 commit 8e73101

File tree

5 files changed

+149
-14
lines changed

5 files changed

+149
-14
lines changed

paddle/fluid/operators/conv_cudnn_op.cu.cc

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ limitations under the License. */
1818
#include "paddle/fluid/operators/conv_op.h"
1919
#include "paddle/fluid/platform/assert.h"
2020
#include "paddle/fluid/platform/cudnn_helper.h"
21+
#include "paddle/fluid/platform/float16.h"
2122

2223
namespace paddle {
2324
namespace operators {
@@ -133,7 +134,8 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
133134
platform::CUDAPlace gpu = boost::get<platform::CUDAPlace>(ctx.GetPlace());
134135
cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes);
135136
// ------------------- cudnn conv forward ---------------------
136-
T alpha = 1.0f, beta = 0.0f;
137+
typename platform::CudnnDataType<T>::ScalingParamType alpha = 1.0f,
138+
beta = 0.0f;
137139
for (int i = 0; i < groups; i++) {
138140
PADDLE_ENFORCE(platform::dynload::cudnnConvolutionForward(
139141
handle, &alpha, cudnn_input_desc, input_data + i * group_offset_in,
@@ -280,7 +282,8 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
280282
platform::CUDAPlace gpu = boost::get<platform::CUDAPlace>(ctx.GetPlace());
281283
cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes);
282284
// ------------------- cudnn conv backward data ---------------------
283-
T alpha = 1.0f, beta = 0.0f;
285+
typename platform::CudnnDataType<T>::ScalingParamType alpha = 1.0f,
286+
beta = 0.0f;
284287
if (input_grad) {
285288
T* input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());
286289
// Because beta is zero, it is unnecessary to reset input_grad.
@@ -315,16 +318,18 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
315318
} // namespace operators
316319
} // namespace paddle
317320

318-
REGISTER_OP_KERNEL(conv2d, CUDNN, ::paddle::platform::CUDAPlace,
321+
namespace plat = paddle::platform;
322+
REGISTER_OP_KERNEL(conv2d, CUDNN, plat::CUDAPlace,
319323
paddle::operators::CUDNNConvOpKernel<float>,
320-
paddle::operators::CUDNNConvOpKernel<double>);
321-
REGISTER_OP_KERNEL(conv2d_grad, CUDNN, ::paddle::platform::CUDAPlace,
324+
paddle::operators::CUDNNConvOpKernel<double>,
325+
paddle::operators::CUDNNConvOpKernel<plat::float16>);
326+
REGISTER_OP_KERNEL(conv2d_grad, CUDNN, plat::CUDAPlace,
322327
paddle::operators::CUDNNConvGradOpKernel<float>,
323328
paddle::operators::CUDNNConvGradOpKernel<double>);
324329

325-
REGISTER_OP_KERNEL(conv3d, CUDNN, ::paddle::platform::CUDAPlace,
330+
REGISTER_OP_KERNEL(conv3d, CUDNN, plat::CUDAPlace,
326331
paddle::operators::CUDNNConvOpKernel<float>,
327332
paddle::operators::CUDNNConvOpKernel<double>);
328-
REGISTER_OP_KERNEL(conv3d_grad, CUDNN, ::paddle::platform::CUDAPlace,
333+
REGISTER_OP_KERNEL(conv3d_grad, CUDNN, plat::CUDAPlace,
329334
paddle::operators::CUDNNConvGradOpKernel<float>,
330335
paddle::operators::CUDNNConvGradOpKernel<double>);

paddle/fluid/operators/conv_op.cc

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,12 +83,23 @@ framework::OpKernelType ConvOp::GetExpectedKernelType(
8383
}
8484
#endif
8585

86+
auto input_data_type =
87+
framework::ToDataType(ctx.Input<Tensor>("Input")->type());
88+
auto filter_data_type =
89+
framework::ToDataType(ctx.Input<Tensor>("Filter")->type());
90+
PADDLE_ENFORCE_EQ(input_data_type, filter_data_type,
91+
"input and filter data type should be consistent");
92+
93+
if (input_data_type == framework::proto::VarType::FP16) {
94+
PADDLE_ENFORCE_EQ(library_, framework::LibraryType::kCUDNN,
95+
"float16 can only be used when CUDNN is used");
96+
}
97+
8698
std::string data_format = ctx.Attr<std::string>("data_format");
8799
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
88100
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
89-
return framework::OpKernelType(
90-
framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
91-
layout_, library_);
101+
return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_,
102+
library_);
92103
}
93104

94105
Conv2DOpMaker::Conv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker)

paddle/fluid/platform/cudnn_helper.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ limitations under the License. */
1919
#include "paddle/fluid/framework/operator.h"
2020
#include "paddle/fluid/platform/dynload/cudnn.h"
2121
#include "paddle/fluid/platform/enforce.h"
22+
#include "paddle/fluid/platform/float16.h"
2223
#include "paddle/fluid/platform/macros.h"
2324

2425
namespace paddle {
@@ -80,6 +81,22 @@ enum class PoolingMode {
8081
template <typename T>
8182
class CudnnDataType;
8283

84+
template <>
85+
class CudnnDataType<float16> {
86+
public:
87+
static const cudnnDataType_t type = CUDNN_DATA_HALF;
88+
// The scaling param type is float for HALF and FLOAT tensors
89+
typedef const float ScalingParamType;
90+
static ScalingParamType* kOne() {
91+
static ScalingParamType v = 1.0;
92+
return &v;
93+
}
94+
static ScalingParamType* kZero() {
95+
static ScalingParamType v = 0.0;
96+
return &v;
97+
}
98+
};
99+
83100
template <>
84101
class CudnnDataType<float> {
85102
public:

python/paddle/fluid/tests/unittests/op_test.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,28 @@ def _numpy_to_lod_tensor(np_value, lod, place):
469469
tensor.set_lod(lod)
470470
return tensor
471471

472+
@staticmethod
473+
def np_dtype_to_fluid_dtype(input):
474+
"""Change the dtype of float16 numpy array
475+
476+
numpy float16 is binded to paddle::platform::float16
477+
in tensor_py.h via the help of uint16 data type since
478+
the internal memory representation of float16 is
479+
uint16_t in paddle and np.uint16 in numpy, which are
480+
themselves binded together by pybind.
481+
482+
Args:
483+
input: input numpy array
484+
485+
Returns:
486+
input: if the dtype of input is np.float16, its dtype will be
487+
changed to np.uint16 so that the internal memory will be
488+
reinterpreted input as of dtype np.uint16.
489+
"""
490+
if input.dtype == np.float16:
491+
input.dtype = np.uint16
492+
return input
493+
472494
def _get_gradient(self, input_to_check, place, output_names, no_grad_set):
473495
prog = Program()
474496
block = prog.global_block()

python/paddle/fluid/tests/unittests/test_conv2d_op.py

Lines changed: 84 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,19 +68,24 @@ def setUp(self):
6868
self.init_op_type()
6969
self.init_group()
7070
self.init_dilation()
71+
self.init_data_type()
7172
self.init_test_case()
7273

7374
conv2d_param = {
7475
'stride': self.stride,
7576
'pad': self.pad,
7677
'dilation': self.dilations
7778
}
78-
input = np.random.random(self.input_size).astype("float32")
79-
filter = np.random.random(self.filter_size).astype("float32")
79+
80+
input = np.random.random(self.input_size).astype(self.dtype)
81+
filter = np.random.random(self.filter_size).astype(self.dtype)
8082
output = conv2d_forward_naive(input, filter, self.groups,
81-
conv2d_param).astype('float32')
83+
conv2d_param).astype(self.dtype)
8284

83-
self.inputs = {'Input': input, 'Filter': filter}
85+
self.inputs = {
86+
'Input': OpTest.np_dtype_to_fluid_dtype(input),
87+
'Filter': OpTest.np_dtype_to_fluid_dtype(filter)
88+
}
8489
self.attrs = {
8590
'strides': self.stride,
8691
'paddings': self.pad,
@@ -99,6 +104,8 @@ def test_check_output(self):
99104
self.check_output()
100105

101106
def test_check_grad(self):
107+
if self.dtype == np.float16:
108+
return
102109
if self.use_cudnn:
103110
place = core.CUDAPlace(0)
104111
self.check_grad_with_place(
@@ -111,6 +118,8 @@ def test_check_grad(self):
111118
set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
112119

113120
def test_check_grad_no_filter(self):
121+
if self.dtype == np.float16:
122+
return
114123
if self.use_cudnn:
115124
place = core.CUDAPlace(0)
116125
self.check_grad_with_place(
@@ -126,6 +135,8 @@ def test_check_grad_no_filter(self):
126135
no_grad_set=set(['Filter']))
127136

128137
def test_check_grad_no_input(self):
138+
if self.dtype == np.float16:
139+
return
129140
if self.use_cudnn:
130141
place = core.CUDAPlace(0)
131142
self.check_grad_with_place(
@@ -148,6 +159,9 @@ def init_test_case(self):
148159
f_c = self.input_size[1] / self.groups
149160
self.filter_size = [6, f_c, 3, 3]
150161

162+
def init_data_type(self):
163+
self.dtype = np.float32
164+
151165
def init_dilation(self):
152166
self.dilations = [1, 1]
153167

@@ -232,36 +246,102 @@ def init_op_type(self):
232246
self.op_type = "conv2d"
233247

234248

249+
class TestFP16CUDNN(TestCUDNN):
250+
def init_data_type(self):
251+
self.dtype = np.float16
252+
253+
def test_check_output(self):
254+
if core.is_compiled_with_cuda():
255+
place = core.CUDAPlace(0)
256+
if core.is_float16_supported(place):
257+
self.check_output_with_place(place, atol=2e-2)
258+
259+
235260
class TestCUDNNWithPad(TestWithPad):
236261
def init_op_type(self):
237262
self.use_cudnn = True
238263
self.op_type = "conv2d"
239264

240265

266+
class TestFP16CUDNNWithPad(TestCUDNNWithPad):
267+
def init_data_type(self):
268+
self.dtype = np.float16
269+
270+
def test_check_output(self):
271+
if core.is_compiled_with_cuda():
272+
place = core.CUDAPlace(0)
273+
if core.is_float16_supported(place):
274+
self.check_output_with_place(place, atol=2e-2)
275+
276+
241277
class TestCUDNNWithStride(TestWithStride):
242278
def init_op_type(self):
243279
self.use_cudnn = True
244280
self.op_type = "conv2d"
245281

246282

283+
class TestFP16CUDNNWithStride(TestCUDNNWithStride):
284+
def init_data_type(self):
285+
self.dtype = np.float16
286+
287+
def test_check_output(self):
288+
if core.is_compiled_with_cuda():
289+
place = core.CUDAPlace(0)
290+
if core.is_float16_supported(place):
291+
self.check_output_with_place(place, atol=2e-2)
292+
293+
247294
class TestCUDNNWithGroup(TestWithGroup):
248295
def init_op_type(self):
249296
self.use_cudnn = True
250297
self.op_type = "conv2d"
251298

252299

300+
class TestFP16CUDNNWithGroup(TestCUDNNWithGroup):
301+
def init_data_type(self):
302+
self.dtype = np.float16
303+
304+
def test_check_output(self):
305+
if core.is_compiled_with_cuda():
306+
place = core.CUDAPlace(0)
307+
if core.is_float16_supported(place):
308+
self.check_output_with_place(place, atol=2e-2)
309+
310+
253311
class TestCUDNNWith1x1(TestWith1x1):
254312
def init_op_type(self):
255313
self.use_cudnn = True
256314
self.op_type = "conv2d"
257315

258316

317+
class TestFP16CUDNNWith1x1(TestCUDNNWith1x1):
318+
def init_data_type(self):
319+
self.dtype = np.float16
320+
321+
def test_check_output(self):
322+
if core.is_compiled_with_cuda():
323+
place = core.CUDAPlace(0)
324+
if core.is_float16_supported(place):
325+
self.check_output_with_place(place, atol=2e-2)
326+
327+
259328
class TestCUDNNWithInput1x1Filter1x1(TestWithInput1x1Filter1x1):
260329
def init_op_type(self):
261330
self.use_cudnn = True
262331
self.op_type = "conv2d"
263332

264333

334+
class TestFP16CUDNNWithInput1x1Filter1x1(TestCUDNNWithInput1x1Filter1x1):
335+
def init_data_type(self):
336+
self.dtype = np.float16
337+
338+
def test_check_output(self):
339+
if core.is_compiled_with_cuda():
340+
place = core.CUDAPlace(0)
341+
if core.is_float16_supported(place):
342+
self.check_output_with_place(place, atol=2e-2)
343+
344+
265345
class TestDepthwiseConv(TestConv2dOp):
266346
def init_test_case(self):
267347
self.pad = [1, 1]

0 commit comments

Comments
 (0)