add fp16 support to conv3d

kexinzhao · kexinzhao · commit 8b16927230ec · 2018-05-06T19:02:08.000-07:00
diff --git a/paddle/fluid/operators/conv_cudnn_op.cu.cc b/paddle/fluid/operators/conv_cudnn_op.cu.cc
@@ -366,7 +366,8 @@ REGISTER_OP_KERNEL(conv2d_grad, CUDNN, plat::CUDAPlace,
 
 REGISTER_OP_KERNEL(conv3d, CUDNN, plat::CUDAPlace,
                    paddle::operators::CUDNNConvOpKernel<float>,
-                   paddle::operators::CUDNNConvOpKernel<double>);
+                   paddle::operators::CUDNNConvOpKernel<double>,
+                   paddle::operators::CUDNNConvOpKernel<plat::float16>);
 REGISTER_OP_KERNEL(conv3d_grad, CUDNN, plat::CUDAPlace,
                    paddle::operators::CUDNNConvGradOpKernel<float>,
                    paddle::operators::CUDNNConvGradOpKernel<double>);
diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_op.py
@@ -70,30 +70,36 @@ def conv3d_forward_naive(input, filter, group, conv_param):
 
 class TestConv3dOp(OpTest):
     def setUp(self):
+        self.op_type = "conv3d"
         self.use_cudnn = False
+        self.dtype = np.float32
+        self.init_kernel_type()
         self.init_group()
-        self.init_op_type()
         self.init_dilation()
         self.init_test_case()
 
         conv3d_param = {
             'stride': self.stride,
             'pad': self.pad,
             'dilations': self.dilations,
-            'use_cudnn': self.use_cudnn,
             'data_format': 'AnyLayout'  # TODO(dzhwinter) : should be fix latter
         }
-        input = np.random.random(self.input_size).astype("float32")
-        filter = np.random.random(self.filter_size).astype("float32")
+
+        input = np.random.random(self.input_size).astype(self.dtype)
+        filter = np.random.random(self.filter_size).astype(self.dtype)
         output = conv3d_forward_naive(input, filter, self.groups,
-                                      conv3d_param).astype("float32")
+                                      conv3d_param).astype(self.dtype)
 
-        self.inputs = {'Input': input, 'Filter': filter}
+        self.inputs = {
+            'Input': OpTest.np_dtype_to_fluid_dtype(input),
+            'Filter': OpTest.np_dtype_to_fluid_dtype(filter)
+        }
         self.attrs = {
             'strides': self.stride,
             'paddings': self.pad,
             'groups': self.groups,
-            'dilations': self.dilations
+            'dilations': self.dilations,
+            'use_cudnn': self.use_cudnn
         }
         self.outputs = {'Output': output}
 
@@ -108,6 +114,8 @@ def test_check_output(self):
             self.check_output()
 
     def test_check_grad(self):
+        if self.dtype == np.float16:
+            return
         if self.testcudnn():
             place = core.CUDAPlace(0)
             self.check_grad_with_place(
@@ -120,6 +128,8 @@ def test_check_grad(self):
                 set(['Input', 'Filter']), 'Output', max_relative_error=0.03)
 
     def test_check_grad_no_filter(self):
+        if self.dtype == np.float16:
+            return
         if self.testcudnn():
             place = core.CUDAPlace(0)
             self.check_grad_with_place(
@@ -135,6 +145,8 @@ def test_check_grad_no_filter(self):
                 no_grad_set=set(['Filter']))
 
     def test_check_grad_no_input(self):
+        if self.dtype == np.float16:
+            return
         if self.testcudnn():
             place = core.CUDAPlace(0)
             self.check_grad_with_place(
@@ -163,8 +175,8 @@ def init_dilation(self):
     def init_group(self):
         self.groups = 1
 
-    def init_op_type(self):
-        self.op_type = "conv3d"
+    def init_kernel_type(self):
+        pass
 
 
 class TestCase1(TestConv3dOp):
@@ -235,34 +247,90 @@ def init_group(self):
         self.groups = 3
 
 
+#----------------Conv3dCUDNN----------------
 class TestCUDNN(TestConv3dOp):
-    def init_op_type(self):
+    def init_kernel_type(self):
         self.use_cudnn = True
-        self.op_type = "conv3d"
+
+
+class TestFP16CUDNN(TestConv3dOp):
+    def init_kernel_type(self):
+        self.use_cudnn = True
+        self.dtype = np.float16
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+            if core.is_float16_supported(place):
+                self.check_output_with_place(place, atol=2e-2)
 
 
 class TestWithGroup1CUDNN(TestWithGroup1):
-    def init_op_type(self):
+    def init_kernel_type(self):
         self.use_cudnn = True
-        self.op_type = "conv3d"
+
+
+class TestFP16WithGroup1CUDNN(TestWithGroup1):
+    def init_kernel_type(self):
+        self.use_cudnn = True
+        self.dtype = np.float16
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+            if core.is_float16_supported(place):
+                self.check_output_with_place(place, atol=2e-2)
 
 
 class TestWithGroup2CUDNN(TestWithGroup2):
-    def init_op_type(self):
+    def init_kernel_type(self):
         self.use_cudnn = True
-        self.op_type = "conv3d"
+
+
+class TestFP16WithGroup2CUDNN(TestWithGroup2):
+    def init_kernel_type(self):
+        self.use_cudnn = True
+        self.dtype = np.float16
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+            if core.is_float16_supported(place):
+                self.check_output_with_place(place, atol=2e-2)
 
 
 class TestWith1x1CUDNN(TestWith1x1):
-    def init_op_type(self):
+    def init_kernel_type(self):
         self.use_cudnn = True
-        self.op_type = "conv3d"
+
+
+class TestFP16With1x1CUDNN(TestWith1x1):
+    def init_kernel_type(self):
+        self.use_cudnn = True
+        self.dtype = np.float16
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+            if core.is_float16_supported(place):
+                self.check_output_with_place(place, atol=2e-2)
 
 
 class TestWithInput1x1Filter1x1CUDNN(TestWithInput1x1Filter1x1):
-    def init_op_type(self):
+    def init_kernel_type(self):
         self.use_cudnn = True
-        self.op_type = "conv3d"
+
+
+class TestFP16WithInput1x1Filter1x1CUDNN(TestWithInput1x1Filter1x1):
+    def init_kernel_type(self):
+        self.use_cudnn = True
+        self.dtype = np.float16
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+            if core.is_float16_supported(place):
+                self.check_output_with_place(place, atol=2e-2)
 
 
 # FIXME(typhoonzero): find a way to determine if