Skip to content

Commit 863f9e5

Browse files
fix conv_transpose Op fp16 error test=develop (#24695) (#24784)
1 parent 627d556 commit 863f9e5

File tree

4 files changed

+158
-29
lines changed

4 files changed

+158
-29
lines changed

paddle/fluid/operators/conv_cudnn_helper.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> {
148148
}
149149
#endif
150150

151-
if (!exhaustive) {
151+
if (!exhaustive && !deterministic) {
152152
#if CUDNN_VERSION >= 7001
153153
int perf_count;
154154
int best_algo_idx = 0;
@@ -185,6 +185,8 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> {
185185
workspace_size_limit, &algo));
186186
#endif
187187
VLOG(3) << "choose algo " << algo;
188+
} else if (deterministic) {
189+
algo = static_cast<cudnnConvolutionFwdAlgo_t>(1);
188190
} else {
189191
auto& dev_ctx =
190192
ctx.template device_context<platform::CUDADeviceContext>();

paddle/fluid/operators/conv_transpose_cudnn_op.cu

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,8 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
245245
int output_offset =
246246
transformed_output.numel() / transformed_output.dims()[0] / groups;
247247
int filter_offset = filter->numel() / groups;
248-
T alpha = static_cast<T>(1.0), beta = static_cast<T>(0.0);
248+
ScalingParamType<T> alpha = 1.0f;
249+
ScalingParamType<T> beta = 0.0f;
249250
auto workspace_handle = dev_ctx.cudnn_workspace_handle();
250251
for (int g = 0; g < groups; g++) {
251252
auto cudnn_func = [&](void* cudnn_workspace) {
@@ -493,7 +494,8 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
493494
int output_grad_offset = transformed_output_grad.numel() /
494495
transformed_output_grad.dims()[0] / groups;
495496
int filter_offset = filter->numel() / groups;
496-
T alpha = static_cast<T>(1.0), beta = static_cast<T>(0.0);
497+
ScalingParamType<T> alpha = 1.0f;
498+
ScalingParamType<T> beta = 0.0f;
497499
auto workspace_handle = dev_ctx.cudnn_workspace_handle();
498500
if (input_grad) {
499501
// Because beta is zero, it is unnecessary to reset input_grad.

python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py

Lines changed: 149 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ class TestConv2dTransposeOp(OpTest):
109109
def setUp(self):
110110
# init as conv transpose
111111
self.dtype = np.float64
112+
self.need_check_grad = True
112113
self.is_test = False
113114
self.use_cudnn = False
114115
self.use_mkldnn = False
@@ -152,35 +153,40 @@ def test_check_output(self):
152153
self.check_output(check_dygraph=(self.use_mkldnn == False))
153154

154155
def test_check_grad_no_input(self):
155-
if self.use_cudnn:
156-
place = core.CUDAPlace(0)
157-
self.check_grad_with_place(
158-
place, ['Filter'],
159-
'Output',
160-
max_relative_error=0.02,
161-
no_grad_set=set(['Input']))
162-
else:
163-
self.check_grad(['Filter'], 'Output', no_grad_set=set(['Input']))
156+
if self.need_check_grad:
157+
if self.use_cudnn:
158+
place = core.CUDAPlace(0)
159+
self.check_grad_with_place(
160+
place, ['Filter'],
161+
'Output',
162+
max_relative_error=0.02,
163+
no_grad_set=set(['Input']))
164+
else:
165+
self.check_grad(
166+
['Filter'], 'Output', no_grad_set=set(['Input']))
164167

165168
def test_check_grad_no_filter(self):
166-
if self.use_cudnn:
167-
place = core.CUDAPlace(0)
168-
self.check_grad_with_place(
169-
place, ['Input'], 'Output', no_grad_set=set(['Filter']))
170-
else:
171-
self.check_grad(['Input'], 'Output', no_grad_set=set(['Filter']))
169+
if self.need_check_grad:
170+
if self.use_cudnn:
171+
place = core.CUDAPlace(0)
172+
self.check_grad_with_place(
173+
place, ['Input'], 'Output', no_grad_set=set(['Filter']))
174+
else:
175+
self.check_grad(
176+
['Input'], 'Output', no_grad_set=set(['Filter']))
172177

173178
def test_check_grad(self):
174-
if self.use_cudnn:
175-
place = core.CUDAPlace(0)
176-
self.check_grad_with_place(
177-
place,
178-
set(['Input', 'Filter']),
179-
'Output',
180-
max_relative_error=0.02)
181-
else:
182-
self.check_grad(
183-
set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
179+
if self.need_check_grad:
180+
if self.use_cudnn:
181+
place = core.CUDAPlace(0)
182+
self.check_grad_with_place(
183+
place,
184+
set(['Input', 'Filter']),
185+
'Output',
186+
max_relative_error=0.02)
187+
else:
188+
self.check_grad(
189+
set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
184190

185191
def init_test_case(self):
186192
self.pad = [0, 0]
@@ -708,6 +714,124 @@ def init_test_case(self):
708714
self.data_format = 'NHWC'
709715

710716

717+
@unittest.skipIf(not core.is_compiled_with_cuda(),
718+
"core is not compiled with CUDA")
719+
class TestCUDNN_FP16(TestConv2dTransposeOp):
720+
def init_test_case(self):
721+
self.dtype = np.float16
722+
self.pad = [1, 1]
723+
self.stride = [1, 1]
724+
self.groups = 1
725+
self.dilations = [1, 1]
726+
self.input_size = [2, 3, 5, 5] # NCHW
727+
f_c = self.input_size[1]
728+
self.filter_size = [f_c, 6, 3, 3]
729+
730+
def init_op_type(self):
731+
self.need_check_grad = False
732+
self.use_cudnn = True
733+
self.op_type = "conv2d_transpose"
734+
735+
def test_check_output(self):
736+
if self.use_cudnn:
737+
place = core.CUDAPlace(0)
738+
self.check_output_with_place(
739+
place, atol=0.02, check_dygraph=(self.use_mkldnn == False))
740+
else:
741+
self.check_output(check_dygraph=(self.use_mkldnn == False))
742+
743+
744+
@unittest.skipIf(not core.is_compiled_with_cuda(),
745+
"core is not compiled with CUDA")
746+
class TestCUDNN_NHWC_FP16(TestCUDNN_FP16):
747+
def init_test_case(self):
748+
self.dtype = np.float16
749+
self.pad = [0, 0]
750+
self.stride = [1, 1]
751+
self.dilations = [1, 1]
752+
self.groups = 1
753+
self.input_size = [2, 5, 5, 3] # NHWC
754+
f_c = self.input_size[-1]
755+
self.filter_size = [f_c, 6, 3, 3]
756+
self.data_format = 'NHWC'
757+
758+
759+
@unittest.skipIf(not core.is_compiled_with_cuda(),
760+
"core is not compiled with CUDA")
761+
class TestCUDNNWithSymmetricPad_NHWC_FP16(TestCUDNN_FP16):
762+
def init_test_case(self):
763+
self.dtype = np.float16
764+
self.pad = [1, 1]
765+
self.stride = [1, 1]
766+
self.groups = 1
767+
self.dilations = [1, 1]
768+
self.input_size = [2, 5, 5, 3] # NHWC
769+
f_c = self.input_size[-1]
770+
self.filter_size = [f_c, 6, 3, 3]
771+
self.data_format = 'NHWC'
772+
773+
774+
@unittest.skipIf(not core.is_compiled_with_cuda(),
775+
"core is not compiled with CUDA")
776+
class TestCUDNNWithAsymmetricPad_NHWC_FP16(TestCUDNN_FP16):
777+
def init_test_case(self):
778+
self.dtype = np.float16
779+
self.pad = [1, 0, 2, 3]
780+
self.stride = [2, 2]
781+
self.groups = 1
782+
self.dilations = [1, 1]
783+
self.input_size = [2, 5, 5, 3] # NHWC
784+
f_c = self.input_size[-1]
785+
self.filter_size = [f_c, 6, 3, 3]
786+
self.data_format = 'NHWC'
787+
788+
789+
@unittest.skipIf(not core.is_compiled_with_cuda(),
790+
"core is not compiled with CUDA")
791+
class TestCUDNNWithStride_NHWC_FP16(TestCUDNN_FP16):
792+
def init_test_case(self):
793+
self.dtype = np.float16
794+
self.pad = [1, 1]
795+
self.stride = [2, 2]
796+
self.groups = 1
797+
self.dilations = [1, 1]
798+
self.input_size = [2, 5, 5, 3] # NHWC
799+
f_c = self.input_size[-1]
800+
self.filter_size = [f_c, 6, 3, 3]
801+
self.data_format = 'NHWC'
802+
803+
804+
@unittest.skipIf(not core.is_compiled_with_cuda(),
805+
"core is not compiled with CUDA")
806+
class TestCUDNNWithGroups_NHWC_FP16(TestCUDNN_FP16):
807+
def init_test_case(self):
808+
self.dtype = np.float16
809+
self.pad = [1, 1]
810+
self.stride = [1, 1]
811+
self.dilations = [1, 1]
812+
self.groups = 2
813+
self.input_size = [2, 5, 5, 4] # NCHW
814+
f_c = self.input_size[-1]
815+
self.filter_size = [f_c, 3, 3, 3]
816+
self.data_format = 'NHWC'
817+
818+
819+
@unittest.skipIf(not core.is_compiled_with_cuda(),
820+
"core is not compiled with CUDA")
821+
class TestCUDNNWithEvenUpsample_NHWC_FP16(TestCUDNN_FP16):
822+
def init_test_case(self):
823+
self.dtype = np.float16
824+
self.pad = [2, 2]
825+
self.stride = [2, 2]
826+
self.groups = 1
827+
self.dilations = [1, 1]
828+
self.output_size = [14, 14]
829+
self.input_size = [2, 7, 7, 3] # NHWC
830+
f_c = self.input_size[-1]
831+
self.filter_size = [f_c, 6, 5, 5]
832+
self.data_format = 'NHWC'
833+
834+
711835
class TestConv2dTransposeAPI(unittest.TestCase):
712836
def test_case1(self):
713837
data1 = fluid.layers.data(

python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,5 +80,6 @@
8080
'fused_elemwise_activation', \
8181
'pool2d', \
8282
'pool3d', \
83-
'softmax'
83+
'softmax',\
84+
'conv2d_transpose'
8485
]

0 commit comments

Comments
 (0)