Merge pull request #819 from RandySheriffH/rashuai/CudnnGRU

RandySheriffH · web-flow · commit c5e560d2481e · 2020-03-04T09:51:51.000-08:00
Rashuai/cudnn gru
diff --git a/tests/test_cudnn.py b/tests/test_cudnn.py
@@ -0,0 +1,51 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+"""Unit Tests for cudnn."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.python.ops import init_ops
+from backend_test_base import Tf2OnnxBackendTestBase
+from common import check_tf_max_version, skip_tf_cpu, check_opset_min_version, unittest_main
+
+
+class CudnnTests(Tf2OnnxBackendTestBase):
+    """ test cudnn cases """
+    @check_tf_max_version("1.15.0", "not supported in tf-2.0")
+    @skip_tf_cpu("only tf_gpu can run CudnnGPU")
+    @check_opset_min_version(10, "CudnnGRU")
+    def test_cudnngru(self):
+        """ test contrib cudnn gru """
+        seq_length = 3
+        batch_size = 5
+        input_size = 2
+        num_layers = 2
+        num_units = 2
+        num_dirs = 2
+        x_val = np.random.randint(0, 100, [seq_length, batch_size, input_size]).astype(np.float32)
+        h_val = np.random.randint(0, 100, [num_layers * num_dirs, batch_size, num_units]).astype(np.float32).reshape(
+            [num_layers * num_dirs, batch_size, num_units])
+
+        def func(x, h):
+            initializer = init_ops.constant_initializer(0.5)
+            cudnngru = tf.contrib.cudnn_rnn.CudnnGRU(num_layers, num_units, 'linear_input', 'bidirectional',
+                                                     kernel_initializer=initializer, bias_initializer=initializer)
+            cudnngru.build([seq_length, batch_size, input_size])
+            outputs = cudnngru.call(x, tuple([h]))
+            _ = tf.identity(outputs[0], name='output')
+
+        feed_dict = {"input_1:0": x_val, "input_2:0": h_val}
+        input_names_with_port = ["input_1:0", "input_2:0"]
+        output_names_with_port = ["output:0"]
+        self.run_test_case(func, feed_dict, input_names_with_port, output_names_with_port, rtol=1e-05, atol=1e-04)
+
+
+if __name__ == '__main__':
+    unittest_main()
diff --git a/tf2onnx/onnx_opset/rnn.py b/tf2onnx/onnx_opset/rnn.py
@@ -10,7 +10,6 @@
 from __future__ import unicode_literals
 
 import logging
-
 import numpy as np
 from tf2onnx import utils
 from tf2onnx.handler import tf_op
@@ -169,3 +168,88 @@ def replace_output(old_output, new_output):
     @classmethod
     def version_7(cls, ctx, node, **kwargs):
         cls.version_1(ctx, node, **kwargs)
+
+
+@tf_op("CudnnRNN")
+class CudnnRNN:
+    @classmethod
+    def version_10(cls, ctx, node, **kwargs):
+        x = node.input[0]
+        x_shape = ctx.get_shape(x)
+        h = node.input[1]
+        h_shape = ctx.get_shape(h)
+        p = node.input[3]
+        utils.make_sure(
+            node.attr["rnn_mode"].s == b"gru",
+            "rnn mode other than gru are not supported yet"
+        )
+        utils.make_sure(
+            node.attr["dropout"].f == 0,
+            "dropout not supported yet"
+        )
+        utils.make_sure(
+            node.attr["input_mode"].s == b"linear_input",
+            "input mode must be linear input"
+        )
+        num_dirs = 1 if node.attr["direction"].s == b"unidirectional" else 2
+        num_layers = int(h_shape[0] / num_dirs)
+        num_units = hidden_size = h_shape[2]
+        input_size = x_shape[2]
+        w_shape = [num_layers * num_dirs, 3 * hidden_size, input_size]
+        w_shape_const = ctx.make_const(utils.make_name("w_shape"), np.array(w_shape, dtype=np.int64))
+        r_shape = [num_layers * num_dirs, 3 * hidden_size, hidden_size]
+        r_shape_const = ctx.make_const(utils.make_name("r_shape"), np.array(r_shape, dtype=np.int64))
+        b_shape = [num_layers * num_dirs, 6 * hidden_size]
+        b_shape_const = ctx.make_const(utils.make_name("b_shape"), np.array(b_shape, dtype=np.int64))
+        zero_const = ctx.make_const(utils.make_name("zero"), np.array([0], dtype=np.int64))
+        w_end = np.prod(w_shape)
+        w_end_const = ctx.make_const(utils.make_name("w_end"), np.array([w_end], dtype=np.int64))
+        r_end = w_end + np.prod(r_shape)
+        r_end_const = ctx.make_const(utils.make_name("r_end"), np.array([r_end], dtype=np.int64))
+        b_end = r_end + np.prod(b_shape)
+        b_end_const = ctx.make_const(utils.make_name("b_end"), np.array([b_end], dtype=np.int64))
+
+        def name(nm):
+            return node.name + "_" + nm
+
+        ws = [name('W_' + str(i)) for i in range(num_layers * num_dirs)]
+        rs = [name('R_' + str(i)) for i in range(num_layers * num_dirs)]
+        bs = [name('B_' + str(i)) for i in range(num_layers * num_dirs)]
+        hs = [name('H_' + str(i)) for i in range(num_layers * num_dirs)]
+        yhs = [name('YH_' + str(i)) for i in range(num_layers * num_dirs)]
+        w_flattened = ctx.make_node('Slice', [p, zero_const.output[0], w_end_const.output[0]])
+        r_flattened = ctx.make_node('Slice', [p, w_end_const.output[0], r_end_const.output[0]])
+        b_flattened = ctx.make_node('Slice', [p, r_end_const.output[0], b_end_const.output[0]])
+        w = utils.make_name('W')
+        r = utils.make_name('R')
+        b = utils.make_name('B')
+        ctx.make_node('Reshape', [w_flattened.output[0], w_shape_const.output[0]], outputs=[w])
+        ctx.make_node('Reshape', [r_flattened.output[0], r_shape_const.output[0]], outputs=[r])
+        ctx.make_node('Reshape', [b_flattened.output[0], b_shape_const.output[0]], outputs=[b])
+        ctx.make_node('Split', [w], outputs=ws)
+        ctx.make_node('Split', [r], outputs=rs)
+        ctx.make_node('Split', [b], outputs=bs)
+        ctx.make_node('Split', [h], outputs=hs)
+        xnf = xnb = x
+        for i in range(num_layers):
+            suffix = '_' + str(i * num_dirs)
+            ctx.make_node('GRU',
+                          [xnf, name('W' + suffix), name('R' + suffix), name('B' + suffix), '', name('H' + suffix)],
+                          outputs=[name('Y' + suffix), name('YH' + suffix)],
+                          attr={'direction': 'forward', 'hidden_size': num_units})
+            xnf = name(x + suffix)
+            ctx.make_node('Squeeze', [name('Y' + suffix)], outputs=[xnf], attr={'axes': [1]})
+            if num_dirs == 2:
+                suffix = '_' + str(i * 2 + 1)
+                ctx.make_node('GRU',
+                              [xnb, name('W' + suffix), name('R' + suffix), name('B' + suffix), '', name('H' + suffix)],
+                              outputs=[name('Y' + suffix), name('YH' + suffix)],
+                              attr={'direction': 'reverse', 'hidden_size': num_units})
+                xnb = name(x + suffix)
+                ctx.make_node('Squeeze', [name('Y' + suffix)], outputs=[xnb], attr={'axes': [1]})
+        ctx.remove_node(node.name)
+        if num_dirs == 2:
+            ctx.make_node('Concat', [xnf, xnb], outputs=[node.output[0]], attr={'axis': -1})
+        else:
+            ctx.make_node('Identity', [xnf], outputs=[node.output[0]])
+        ctx.make_node('Concat', yhs, outputs=[node.output[1]], attr={'axis': 0})
diff --git a/tf2onnx/onnx_opset/tensor.py b/tf2onnx/onnx_opset/tensor.py
@@ -1653,23 +1653,35 @@ def version_10(cls, ctx, node, **kwargs):
                     inputs = [new_node.output[0]]
 
                 # Add a Constant node (seq_len) for ReverseSequence.
-
-                # Index 1 for the shape should not return 0
-                # since the input must have rank >= 2.
-                rs_batch_size = ctx.get_shape(inputs[-1])[1]
-
-                # Make sure rs_batch_size and input_shape[axis] are not -1 each
-                utils.make_sure(input_shape[axis] is not -1 \
-                                , "shape of axis {} is unknown".format(axis))
-                utils.make_sure(rs_batch_size is not -1 \
-                                , "ReverseSequence batch size for axis {} is unknown".format(axis))
-
-                seq_list = [input_shape[axis]] * rs_batch_size
-                seq_array = np.asarray(seq_list, dtype=np.int64)  # dtype should be int64
-
-                const_seq_name = utils.make_name(const_name_root)
-                new_node = ctx.make_const(name=const_seq_name, np_val=seq_array)
-                inputs.append(new_node.output[0])
+                if ctx.opset >= 11:
+                    batch_shape = ctx.make_node("Shape", [inputs[-1]])
+                    const_one = ctx.make_const(utils.make_name(node.name + "_const_one"), np.array([1], dtype=np.int64))
+                    const_two = ctx.make_const(utils.make_name(node.name + "_const_two"), np.array([2], dtype=np.int64))
+                    batch_size = ctx.make_node("Slice",
+                                               [batch_shape.output[0], const_one.output[0], const_two.output[0]])
+                    input_shape = ctx.make_node("Shape", [node.input[0]])
+                    const_axis = ctx.make_const(utils.make_name(node.name + "_const_axis"),
+                                                np.array([axis], dtype=np.int64))
+                    const_axis_next = ctx.make_const(utils.make_name(node.name + "_const_axis_next"),
+                                                     np.array([axis + 1], dtype=np.int64))
+                    input_axis = ctx.make_node("Slice",
+                                               [input_shape.output[0], const_axis.output[0], const_axis_next.output[0]])
+                    seq_array = ctx.make_node("Expand", [input_axis.output[0], batch_size.output[0]])
+                    inputs.append(seq_array.output[0])
+                else:
+                    # Index 1 for the shape should not return 0
+                    # since the input must have rank >= 2.
+                    rs_batch_size = ctx.get_shape(inputs[-1])[1]
+                    # Make sure rs_batch_size and input_shape[axis] are not -1 each
+                    utils.make_sure(input_shape[axis] is not -1 \
+                                    , "shape of axis {} is unknown".format(axis))
+                    utils.make_sure(rs_batch_size is not -1 \
+                                    , "ReverseSequence batch size for axis {} is unknown".format(axis))
+                    seq_list = [input_shape[axis]] * rs_batch_size
+                    seq_array = np.asarray(seq_list, dtype=np.int64)  # dtype should be int64
+                    const_seq_name = utils.make_name(const_name_root)
+                    new_node = ctx.make_const(name=const_seq_name, np_val=seq_array)
+                    inputs.append(new_node.output[0])
 
                 # Add a ReverseSequence node.