fixes for conv padding

guschmue · guschmue · commit b7a98358da26 · 2018-07-07T17:41:15.000-07:00
diff --git a/tests/test_backend.py b/tests/test_backend.py
@@ -7,6 +7,7 @@
 import tempfile
 import unittest
 from collections import namedtuple
+from itertools import product
 
 import numpy as np
 import tensorflow as tf
@@ -16,6 +17,10 @@
 
 TMPPATH = tempfile.mkdtemp()
 
+# we can override BACKEND and OPSET from the command line, but that is to late
+# to change the behavior of annotation. If need, pick the backend here.
+OPSET = 7
+
 BACKEND = "caffe2"
 # BACKEND = "onnxmsrt"
 # BACKEND = "onnxmsrtnext"
@@ -37,7 +42,6 @@
 _OUTPUT = "output:0"
 _OUTPUT1 = "output1:0"
 
-OPSET = 7
 
 # pylint: disable=C0111
 
@@ -47,6 +51,50 @@ def make_xval(shape):
     return x_val
 
 
+def get_conv_getdata(kind=1):
+    if kind == 0:
+        # generate all combinations (costly)
+        dims = [
+            ("padding", ["SAME", "VALID"]),
+            ("input_sizes", [[32, 35, 35, 288], [32, 17, 17, 1248], [1, 28, 28, 3], [32, 8, 8, 2048]]),
+            ("filter_sizes", [[1, 3, 3, 1], [1, 2, 2, 1], [1, 5, 5, 1], [1, 1, 1, 1], [1, 5, 2, 1], [1, 2, 5, 1]]),
+            ("strides", [[1, 2, 2, 1], [1, 1, 1, 1]]),
+        ]
+        values = [key_values[1] for key_values in dims]
+        for idx, v in enumerate(product(*values)):
+            if True or idx == 30:
+                yield (idx,) + v
+    elif kind == 1:
+        # some combination to that give decent padding coverage
+        data = [
+            ('SAME', [32, 35, 35, 288], [1, 3, 3, 1], [1, 2, 2, 1]),
+            ('SAME', [32, 35, 35, 288], [1, 2, 2, 1], [1, 2, 2, 1]),
+            ('SAME', [32, 35, 35, 288], [1, 2, 2, 1], [1, 1, 1, 1]),
+            ('SAME', [32, 35, 35, 288], [1, 5, 5, 1], [1, 1, 1, 1]),
+            ('SAME', [32, 35, 35, 288], [1, 1, 1, 1], [1, 2, 2, 1]),
+            ('SAME', [32, 35, 35, 288], [1, 1, 1, 1], [1, 1, 1, 1]),
+            ('SAME', [32, 35, 35, 288], [1, 5, 2, 1], [1, 2, 2, 1]),
+            ('SAME', [32, 35, 35, 288], [1, 2, 5, 1], [1, 2, 2, 1]),
+            ('SAME', [32, 35, 35, 288], [1, 2, 5, 1], [1, 1, 1, 1]),
+            ('SAME', [1, 28, 28, 3], [1, 3, 3, 1], [1, 2, 2, 1]),
+            ('SAME', [1, 28, 28, 3], [1, 3, 3, 1], [1, 1, 1, 1]),
+            ('SAME', [1, 28, 28, 3], [1, 2, 2, 1], [1, 2, 2, 1]),
+            ('SAME', [1, 28, 28, 3], [1, 2, 2, 1], [1, 1, 1, 1]),
+            ('SAME', [1, 28, 28, 3], [1, 5, 5, 1], [1, 2, 2, 1]),
+            ('SAME', [1, 28, 28, 3], [1, 5, 5, 1], [1, 1, 1, 1]),
+            ('SAME', [1, 28, 28, 3], [1, 5, 2, 1], [1, 2, 2, 1]),
+            ('SAME', [1, 28, 28, 3], [1, 2, 5, 1], [1, 1, 1, 1]),
+            ('SAME', [32, 8, 8, 2048], [1, 3, 3, 1], [1, 2, 2, 1]),
+            ('SAME', [32, 8, 8, 2048], [1, 3, 3, 1], [1, 1, 1, 1]),
+            ('VALID', [32, 35, 35, 288], [1, 3, 3, 1], [1, 1, 1, 1]),
+            ('VALID', [32, 35, 35, 288], [1, 2, 2, 1], [1, 2, 2, 1]),
+        ]
+        for idx, v in enumerate(data):
+            yield (idx,) + v
+    else:
+        raise ValueError("kind not known")
+
+
 class Tf2OnnxBackendTests(unittest.TestCase):
     def setUp(self):
         self.maxDiff = None
@@ -198,20 +246,26 @@ def test_multinomial1(self):
         self.assertEqual(expected.shape, actual.shape)
 
     def test_maxppol(self):
-        x_val = make_xval((1, 4, 4, 1))
-        x = tf.placeholder(tf.float32, shape=x_val.shape, name=_TFINPUT)
-        mp = tf.nn.max_pool(x, [1, 2, 2, 1], _STRIDE1x1, padding="VALID")
-        output = tf.identity(mp, name=_TFOUTPUT)
-        actual, expected = self._run(output, {x: x_val}, {_INPUT: x_val})
-        self.assertAllClose(expected, actual)
+        for p in get_conv_getdata():
+            idx, padding, x_shape, ksize, strides = p
+            tf.reset_default_graph()
+            x_val = make_xval(x_shape)
+            x = tf.placeholder(tf.float32, shape=x_val.shape, name=_TFINPUT)
+            mp = tf.nn.max_pool(x, ksize, strides, padding=padding)
+            output = tf.identity(mp, name=_TFOUTPUT)
+            actual, expected = self._run(output, {x: x_val}, {_INPUT: x_val})
+            self.assertAllClose(expected, actual, err_msg=str(p))
 
     def test_avgppol(self):
-        x_val = make_xval((1, 4, 4, 1))
-        x = tf.placeholder(tf.float32, shape=x_val.shape, name=_TFINPUT)
-        mp = tf.nn.avg_pool(x, [1, 2, 2, 1], _STRIDE1x1, padding="VALID")
-        output = tf.identity(mp, name=_TFOUTPUT)
-        actual, expected = self._run(output, {x: x_val}, {_INPUT: x_val})
-        self.assertAllClose(expected, actual)
+        for p in get_conv_getdata(kind=0):
+            idx, padding, x_shape, ksize, strides = p
+            tf.reset_default_graph()
+            x_val = make_xval(x_shape)
+            x = tf.placeholder(tf.float32, shape=x_val.shape, name=_TFINPUT)
+            mp = tf.nn.avg_pool(x, ksize, strides, padding=padding)
+            output = tf.identity(mp, name=_TFOUTPUT)
+            actual, expected = self._run(output, {x: x_val}, {_INPUT: x_val})
+            self.assertAllClose(expected, actual, err_msg=str(p))
 
     def _conv_test(self, x_val, w, strides=None, padding="VALID", dilations=None):
         if strides is None:
@@ -753,13 +807,20 @@ def test_pow_scalar(self):
 
     @unittest.skipIf(BACKEND == "caffe2", "not supported correctly in caffe2")
     def test_pad(self):
-        x_val = np.array([[1.0, 1.2], [2.3, 3.4], [4.5, 5.7]], dtype=np.float32)
-        x = tf.placeholder(tf.float32, x_val.shape, name=_TFINPUT)
-        paddings = tf.constant([[0, 0, ], [2, 0]])
-        op = tf.pad(x, paddings, "CONSTANT")
-        output = tf.identity(op, name=_TFOUTPUT)
-        actual, expected = self._run(output, {x: x_val}, {_INPUT: x_val})
-        self.assertAllClose(expected, actual)
+        params = [
+            ("CONSTANT", [[1, 1], [2, 2]], [[1.0, 1.2], [2.3, 3.4], [4.5, 5.7]]),
+            ("CONSTANT", [[0, 0], [3, 3], [3, 3], [0, 0]],  np.random.randn(1, 3, 4, 5).astype(np.float32)),
+        ]
+        for p in params:
+            tf.reset_default_graph()
+            mode, pad, xv = p
+            x_val = np.array(xv, dtype=np.float32)
+            x = tf.placeholder(tf.float32, x_val.shape, name=_TFINPUT)
+            paddings = tf.constant(pad)
+            op = tf.pad(x, paddings, mode)
+            output = tf.identity(op, name=_TFOUTPUT)
+            actual, expected = self._run(output, {x: x_val}, {_INPUT: x_val})
+            self.assertAllClose(expected, actual, err_msg=str(p))
 
     @unittest.skipIf(BACKEND in ["caffe2", "onnxmsrt"], "not supported correctly in caffe2")
     def test_randomuniform(self):
diff --git a/tf2onnx/tfonnx.py b/tf2onnx/tfonnx.py
@@ -304,6 +304,11 @@ def reshape_op5(ctx, node, name, args):
 HWCN_TO_NCHW = [3, 2, 0, 1]
 NCHW_TO_HWCN = [2, 3, 1, 0]
 
+def spatial_map(shape, perm):
+    new_shape = shape[:]
+    for i in perm:
+        new_shape[i] = shape[perm[i]]
+    return new_shape
 
 def conv_convert_inputs(ctx, node, with_kernel=False, new_kernel_shape=None,
                         input_indices=None, output_indices=None):
@@ -320,11 +325,6 @@ def conv_convert_inputs(ctx, node, with_kernel=False, new_kernel_shape=None,
             new_kernel_shape: reshape the kernel
     """
 
-    def calc_shape(a, b):
-        if a and b:
-            return [a[b[i]] for i in b]
-        return None
-
     if input_indices is None:
         input_indices = [0]
     if output_indices is None:
@@ -335,42 +335,44 @@ def calc_shape(a, b):
     if node.is_nhwc():
         # transpose input if needed, no need to record shapes on input
         for idx in input_indices:
+            parent = node.inputs[idx]
             if node.inputs[idx].is_const():
                 # if input is a constant, transpose that one
-                parent = node.inputs[idx]
                 if not parent.data_format:
                     val = parent.get_tensor_value()
                     parent.set_tensor_value(val.transpose(NHWC_TO_NCHW))
-                    parent.data_format = "NCHW"
             else:
                 # if input comes from a op, insert transpose op
                 input_name = node.input[idx]
                 transpose = ctx.insert_new_node_on_input(node, "Transpose", input_name)
                 transpose.set_attr("perm", NHWC_TO_NCHW)
                 transpose.inserted_nchw = True
-                if idx == 0:
-                    ctx.set_shape(transpose.output[0], calc_shape(ctx.get_shape(input_name), NHWC_TO_NCHW))
+                shape = ctx.get_shape(input_name)
+                new_shape = spatial_map(shape, NHWC_TO_NCHW)
+                ctx.set_shape(transpose.output[0], new_shape)
                 nodes.append(transpose)
+            parent.data_format = "NCHW"
 
     # kernel mist to be transposed
     if with_kernel:
+        parent = node.inputs[1]
         if node.inputs[1].is_const():
             # kernel is const - transpose the const
-            parent = node.inputs[1]
             if not parent.data_format:
                 val = parent.get_tensor_value()
                 val = val.transpose(HWCN_TO_NCHW)
                 parent.set_tensor_value(val)
-                parent.data_format = "NCHW"
         else:
             # kernel comes from op, insert transpose op
             input_name = node.input[1]
             transpose = ctx.insert_new_node_on_input(node, "Transpose", input_name)
             transpose.set_attr("perm", HWCN_TO_NCHW)
             transpose.inserted_nchw = True
             ctx.copy_shape(input_name, transpose.output[0])
-            ctx.set_shape(transpose.output[0], calc_shape(ctx.get_shape(input_name), HWCN_TO_NCHW))
+            new_shape = spatial_map(ctx.get_shape(input_name), HWCN_TO_NCHW)
+            ctx.set_shape(transpose.output[0], new_shape)
             nodes.append(transpose)
+        parent.data_format = "NCHW"
 
         # some onnx conv ops require the reshape the kernel (ie. depthwise_conv2d)
         if new_kernel_shape:
@@ -379,46 +381,52 @@ def calc_shape(a, b):
                 input_name = node.input[1]
                 reshape = ctx.insert_new_node_on_input(node, "Reshape", input_name)
                 reshape.set_attr("shape", new_kernel_shape)
-                ctx.set_shape(reshape.output[0], new_kernel_shape)
             else:
                 # new reshape takes new shape as input[1]
                 shape_name = utils.make_name(node.name)
                 shape_node = ctx.make_const(shape_name, "Const", np.array(new_kernel_shape, dtype=np.int64))
                 input_name = node.input[1]
                 reshape = ctx.insert_new_node_on_input(node, "Reshape", input_name)
                 reshape.input.append(shape_name)
-                ctx.set_shape(reshape.output[0], new_kernel_shape)
+            ctx.set_shape(reshape.output[0], new_kernel_shape)
             nodes.append(reshape)
 
     # insert conv node after inputs
     nodes.append(node)
 
     # transpose outputs if needed
     if node.is_nhwc():
-        # TODO: what if len(output) > 0 ?
         for idx in output_indices:
             output_name = node.output[idx]
             op_name = utils.make_name(node.name)
             transpose = ctx.insert_new_node_on_output("Transpose", output_name, name=op_name)
             transpose.set_attr("perm", NCHW_TO_NHWC)
             transpose.inserted_nchw = True
-            ctx.set_shape(transpose.output[0], calc_shape(ctx.get_shape(node.output[idx]), NCHW_TO_NHWC))
+            ctx.set_shape(transpose.output[0], ctx.get_shape(node.output[idx]))
             nodes.append(transpose)
+            node.data_format = "NCHW"
     return nodes
 
 
-def add_padding(node, kernel_shape, strides):
+def add_padding(ctx, node, kernel_shape, strides, dilations=None, spatial=2):
     padding = node.get_attr("padding")
     if padding:
+        if dilations is None:
+            dilations = [1] * spatial * 2
         padding = padding.s.decode("utf-8")
         if padding == 'SAME':
-            s_h, s_w = strides[0], strides[1]
-            k_h, k_w = kernel_shape[0], kernel_shape[1]
-            p_x0 = (k_w - s_w) // 2
-            p_y0 = (k_h - s_h) // 2
-            p_x1 = k_w - s_w - p_x0
-            p_y1 = k_h - s_h - p_y0
-            node.set_attr("pads", [p_y0, p_x0, p_y1, p_x1])
+            pads = [0] * spatial * 2
+            input_shape = ctx.get_shape(node.input[0])
+            output_shape = ctx.get_shape(node.output[0])
+            if node.is_nhwc():
+                input_shape = spatial_map(input_shape, NHWC_TO_NCHW)
+                output_shape = spatial_map(output_shape, NHWC_TO_NCHW)
+            for i in range(spatial):
+                pad = (output_shape[i + 2] - 1) * strides[i] + dilations[i] * kernel_shape[i] - input_shape[i + 2]
+                pad = max(pad, 0)
+                pads[i] = pad // 2
+                pads[i + spatial] = pad - pad // 2
+            node.set_attr("pads", pads)
         elif padding == 'VALID':
             pass
         else:
@@ -445,12 +453,11 @@ def conv_dims_attr(node, name, new_name=None):
     return dims
 
 
-def conv_kernel_shape(ctx, node, input_idx):
+def conv_kernel_shape(ctx, node, input_idx, spatial=2):
     kernel_shape = ctx.get_shape(node.input[1])
-    if len(kernel_shape) != 4:
-        raise ValueError("only Conv2D is supported")
-    h, w, c, n = kernel_shape
-    kernel_shape = [h, w]
+    if len(kernel_shape) != 2 * spatial:
+        raise ValueError("kernel rank must be 2* spatial")
+    kernel_shape = kernel_shape[0:spatial]
     node.set_attr("kernel_shape", kernel_shape)
     return kernel_shape
 
@@ -460,11 +467,10 @@ def conv_op(ctx, node, name, args):
     #                       @string padding, @string data_format)
     # T Y = Conv(T X, T W, T B, @AttrType.STRING auto_pad, @AttrType.INTS dilations, @AttrType.INT group,
     #                       @AttrType.INTS kernel_shape, @AttrType.INTS pads, @AttrType.INTS strides)
-    kernel_shape = conv_kernel_shape(ctx, node, 1)
+    kernel_shape = conv_kernel_shape(ctx, node, 1, spatial=2)
     strides = conv_dims_attr(node, "strides")
-    conv_dims_attr(node, "dilations")
-    add_padding(node, kernel_shape, strides)
-
+    dilations = conv_dims_attr(node, "dilations")
+    add_padding(ctx, node, kernel_shape, strides, dilations=dilations, spatial=2)
     nodes = conv_convert_inputs(ctx, node, with_kernel=True)
     return nodes
 
@@ -486,7 +492,7 @@ def convtranspose_op(ctx, node, name, args):
 
     strides = conv_dims_attr(node, "strides")
     conv_dims_attr(node, "dilations")
-    add_padding(node, kernel_shape, strides)
+    add_padding(ctx, node, kernel_shape, strides)
 
     # remove output_shapes input, swap data and kernel
     ctx.remove_input(node, node.input[0])
@@ -530,7 +536,7 @@ def depthwiseconv_op(ctx, node, name, args):
     strides = conv_dims_attr(node, "strides")
     conv_dims_attr(node, "dilations")
     node.set_attr("group", i_c)
-    add_padding(node, kernel_shape, strides)
+    add_padding(ctx, node, kernel_shape, strides)
 
     new_kernel_shape = [k_output_channels, 1, k_h, k_w]
     nodes = conv_convert_inputs(ctx, node, with_kernel=True, new_kernel_shape=new_kernel_shape)
@@ -561,7 +567,7 @@ def pool_op(ctx, node, name, args):
 
     conv_dims_attr(node, "dilations")
 
-    add_padding(node, kernel_shape, strides)
+    add_padding(ctx, node, kernel_shape, strides)
 
     nodes = conv_convert_inputs(ctx, node, with_kernel=False)
     return nodes