Tom/keras gru (#1692)

TomWildenhain-Microsoft · guschmue · web-flow · commit 186b9540d705 · 2021-09-03T22:05:10.000Z
* Implement GRU rewriter for tf2

Signed-off-by: Tom Wildenhain &lt;tomwi@microsoft.com&gt;

* Extend GRU rewriter for keras pattern

Signed-off-by: Tom Wildenhain &lt;tomwi@microsoft.com&gt;

* Fix style and GRU test

Signed-off-by: Tom Wildenhain &lt;tomwi@microsoft.com&gt;

* Remove no_loops_tf2 assertion from keras tests for now

Signed-off-by: Tom Wildenhain &lt;tomwi@microsoft.com&gt;

Co-authored-by: Guenther Schmuelling &lt;guschmue@microsoft.com&gt;
diff --git a/tests/keras2onnx_unit_tests/test_layers.py b/tests/keras2onnx_unit_tests/test_layers.py
@@ -7,6 +7,7 @@
 from mock_keras2onnx.proto import (keras, is_tf_keras,
                                    is_tensorflow_older_than, is_tensorflow_later_than,
                                    is_keras_older_than, is_keras_later_than)
+from test_utils import no_loops_in_tf2
 
 K = keras.backend
 Activation = keras.layers.Activation
@@ -1864,7 +1865,7 @@ def test_GRU_2(runner):
     onnx_model = convert_keras(model, name=model.name)
     data = np.random.rand(3, 257).astype(np.float32).reshape((3, 1, 257))
     expected = model.predict(data)
-    runner(onnx_model.graph.name, onnx_model, data, expected)
+    assert runner(onnx_model.graph.name, onnx_model, data, expected)
 
 
 @pytest.mark.parametrize('return_sequences', [False, True])
diff --git a/tests/keras2onnx_unit_tests/test_utils.py b/tests/keras2onnx_unit_tests/test_utils.py
@@ -157,6 +157,10 @@ def parse_profile_results(sess_time, kernel_time_only=False, threshold=0):
     return results
 
 
+def no_loops_in_tf2(onnx_model):
+    return not is_tf2 or all(n.op_type != "Loop" for n in onnx_model.graph.node)
+
+
 def run_onnx_runtime(case_name, onnx_model, data, expected, model_files, rtol=1.e-3, atol=1.e-6,
                      compare_perf=False, enable_profiling=False):
     if not os.path.exists(tmp_path):
diff --git a/tests/test_gru.py b/tests/test_gru.py
@@ -9,11 +9,29 @@
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import variable_scope
 from backend_test_base import Tf2OnnxBackendTestBase
-from common import unittest_main, check_gru_count, check_opset_after_tf_version, check_op_count
+from common import unittest_main, check_gru_count, check_opset_after_tf_version, check_op_count, check_tf_min_version
 from tf2onnx.tf_loader import is_tf2
 
 # pylint: disable=missing-docstring,invalid-name,unused-argument,using-constant-test,cell-var-from-loop
 
+# names for input and outputs for tests
+_TFINPUT = "input"
+_INPUT = "input:0"
+_TFINPUT1 = "input1"
+_INPUT1 = "input1:0"
+_TFINPUT2 = "input2"
+_INPUT2 = "input2:0"
+_TFINPUT3 = "input3"
+_INPUT3 = "input3:0"
+_TFOUTPUT = "output"
+_OUTPUT = "output:0"
+_TFOUTPUT1 = "output1"
+_OUTPUT1 = "output1:0"
+_TFOUTPUT2 = "output2"
+_OUTPUT2 = "output2:0"
+_TFOUTPUT3 = "output3"
+_OUTPUT3 = "output3:0"
+
 if is_tf2():
     # There is no LSTMBlockCell in tf-2.x
     BasicLSTMCell = tf.compat.v1.nn.rnn_cell.BasicLSTMCell
@@ -696,6 +714,23 @@ def func(x, y1, y2):
         self.run_test_case(func, feed_dict, input_names_with_port, output_names_with_port, rtol=1e-3, atol=1e-06)
         # graph_validator=lambda g: check_gru_count(g, 2))
 
+    @check_tf_min_version("2.0")
+    def test_keras_gru(self):
+        in_shape = [10, 3]
+        x_val = np.random.uniform(size=[2, 10, 3]).astype(np.float32)
+
+        model_in = tf.keras.layers.Input(tuple(in_shape), batch_size=2)
+        x = tf.keras.layers.GRU(5, return_sequences=True, return_state=True,
+                                kernel_initializer=tf.random_uniform_initializer(0.0, 1.0, seed=42),
+                                recurrent_initializer=tf.random_uniform_initializer(0.0, 1.0, seed=44),
+                                bias_initializer=tf.random_uniform_initializer(0.0, 1.0, seed=43))(model_in)
+        model = tf.keras.models.Model(inputs=model_in, outputs=x)
+
+        def func(x):
+            y = model(x)
+            return tf.identity(y[0], name=_TFOUTPUT), tf.identity(y[1], name=_TFOUTPUT1)
+        self.run_test_case(func, {_INPUT: x_val}, [], [_OUTPUT, _OUTPUT1], rtol=1e-05, atol=1e-06)
+
 
 if __name__ == '__main__':
     unittest_main()
diff --git a/tf2onnx/rewriter/gru_rewriter.py b/tf2onnx/rewriter/gru_rewriter.py
@@ -139,10 +139,40 @@ def is_valid(self, context):
             return False
         return True
 
+    def _make_constants(self, context, W_zrh, R_zrh, B_zrh):
+        input_size = W_zrh.shape[-1]
+        hidden_size = R_zrh.shape[-1]
+        w_name = utils.make_name("W")
+        w_node = self.g.make_const(w_name, W_zrh, skip_conversion=True)
+
+        r_name = utils.make_name("R")
+        r_node = self.g.make_const(r_name, R_zrh, skip_conversion=True)
+
+        b_name = utils.make_name("B")
+        b_node = self.g.make_const(b_name, B_zrh, skip_conversion=True)
+
+        context.input_size = input_size
+        context.hidden_size = hidden_size
+        context.onnx_input_ids["W"] = w_node.output[0]
+        context.onnx_input_ids["R"] = r_node.output[0]
+        context.onnx_input_ids["B"] = b_node.output[0]
+
+    def _process_weights_and_bias_keras(self, context):
+        weights = context.weights
+        W_zrh = np.expand_dims(weights["gate_kernel"].transpose(), axis=0)
+        R_zrh = np.expand_dims(weights["hidden_kernel"].transpose(), axis=0)
+        Wb_zrh = weights["gate_bias"]
+        Rb_zrh = weights["hidden_bias"]
+        B_zrh = np.expand_dims(np.concatenate((Wb_zrh, Rb_zrh), axis=0), axis=0)
+        self._make_constants(context, W_zrh, R_zrh, B_zrh)
+
     def process_weights_and_bias(self, context):
         """
         why split the data in this way should refer to code of tensorflow GRU cell and official document of ONNX GRU
         """
+        if context.from_keras:
+            self._process_weights_and_bias_keras(context)
+            return
         weights = context.weights
         # from code of tensorflow GRU cell, it can be known that shape of hidden_kernel(or candidate_kernel)
         # is (input_size+hidden_unit, hidden_unit)
@@ -157,6 +187,8 @@ def process_weights_and_bias(self, context):
         h_kernel = weights["hidden_kernel"]
         r_bias, z_bias = np.split(weights["gate_bias"], [hidden_size], axis=0)
         h_bias = weights["hidden_bias"]
+        for k in sorted(weights.keys()):
+            print(k, weights[k].shape)
         # ONNX GRU split weights of input and state, so have to split *_kernel
         input_r_kernel, state_r_kernel = np.split(r_kernel, [input_size], axis=0)
         input_z_kernel, state_z_kernel = np.split(z_kernel, [input_size], axis=0)
@@ -181,20 +213,7 @@ def process_weights_and_bias(self, context):
         B_zrh = B_zrh.astype(bias_dtype)
         assert B_zrh.shape == (1, 6*hidden_size)
         # create const ONNX node
-        w_name = utils.make_name("W")
-        w_node = self.g.make_const(w_name, W_zrh, skip_conversion=True)
-
-        r_name = utils.make_name("R")
-        r_node = self.g.make_const(r_name, R_zrh, skip_conversion=True)
-
-        b_name = utils.make_name("B")
-        b_node = self.g.make_const(b_name, B_zrh, skip_conversion=True)
-
-        context.input_size = input_size
-        context.hidden_size = hidden_size
-        context.onnx_input_ids["W"] = w_node.output[0]
-        context.onnx_input_ids["R"] = r_node.output[0]
-        context.onnx_input_ids["B"] = b_node.output[0]
+        self._make_constants(context, W_zrh, R_zrh, B_zrh)
 
     def process_var_init_nodes(self, context):
         assert "state" in context.state_variables.keys()
diff --git a/tf2onnx/rewriter/gru_tf2_rewriter.py b/tf2onnx/rewriter/gru_tf2_rewriter.py
@@ -6,7 +6,7 @@
 """
 
 from tf2onnx.graph_matcher import GraphMatcher
-from tf2onnx.rewriter.rnn_utils import make_grucell_pattern
+from tf2onnx.rewriter.rnn_utils import make_grucell_pattern, keras_gru_pattern
 from tf2onnx.tf_loader import find_function
 from tf2onnx.rewriter.unit_rnn_rewriter_base import UnitRnnContext
 from tf2onnx.rewriter.gru_rewriter import GRUUnitRewriter
@@ -17,8 +17,9 @@
 
 def rewrite_gru_tf2(g, ops):
     pattern1 = make_grucell_pattern("Identity")
+    pattern2 = keras_gru_pattern
 
-    for pattern in [pattern1]:
+    for pattern in [pattern1, pattern2]:
         matcher = GraphMatcher(pattern, allow_reorder=True)
         match_results = list(matcher.match_ops(ops))
         for match_result in match_results:
@@ -27,17 +28,21 @@ def rewrite_gru_tf2(g, ops):
             if activation_op.type not in ["Relu", "Tanh", "Sigmoid"]:
                 continue
 
-            concat = match_result.get_op("cell_inputs")
-            if len(concat.inputs) != 3:
-                continue
-            get_item = concat.inputs[0]
+            if pattern is pattern1:
+                concat = match_result.get_op("cell_inputs")
+                if len(concat.inputs) != 3:
+                    continue
+                get_item = concat.inputs[0]
+                init_state = concat.inputs[1]
+            else:
+                get_item = match_result.get_op("gru_input")
+                init_state = match_result.get_op("state")
             if not get_item.type == "TensorListGetItem":
                 continue
             x_e = get_item.inputs[0]
             if not x_e.is_graph_input():
                 continue
             x_idx = g.input_names.index(x_e.output[0])
-            init_state = concat.inputs[1]
             if not init_state.is_graph_input():
                 continue
             init_state_idx = g.input_names.index(init_state.output[0])
@@ -69,6 +74,8 @@ def has_tensor_list_consumer(n):
             out_idx = g.input_names.index(tensor_set_items[0].input[0])
 
             hk = match_result.get_op("hidden_kernel")
+            while hk.type == "Identity":
+                hk = hk.inputs[0]
             if not hk.is_graph_input():
                 continue
             hk_idx = g.input_names.index(hk.output[0])
@@ -79,6 +86,8 @@ def has_tensor_list_consumer(n):
             hb_idx = g.input_names.index(hb.output[0])
 
             gk = match_result.get_op("gate_kernel")
+            while gk.type == "Identity":
+                gk = gk.inputs[0]
             if not gk.is_graph_input():
                 continue
             gk_idx = g.input_names.index(gk.output[0])
@@ -102,6 +111,8 @@ def has_tensor_list_consumer(n):
                 "gate_bias_idx": gb_idx,
                 "seq_len_idx": seq_len_idx,
                 "activations": activations,
+                "from_keras": pattern is pattern2,
+                "linear_before_reset": 1 if pattern is pattern2 else 0,
             }
 
     for op in ops:
@@ -125,13 +136,15 @@ def has_tensor_list_consumer(n):
             initial_state = GraphBuilder(g).make_unsqueeze({"data": initial_state_sq, "axes": [0]})
 
             context = UnitRnnContext()
+            context.from_keras = body_context["from_keras"]
             context.weights.update({
                 "hidden_kernel": hk_const,
                 "hidden_bias": hb_const,
                 "gate_kernel": gk_const,
                 "gate_bias": gb_const
             })
             context.attributes["activations"] = body_context["activations"]
+            context.attributes["linear_before_reset"] = body_context["linear_before_reset"]
             tensor_array_inp = op.inputs[body_context["x_idx"]]
             if not tensor_array_inp.type == "TensorListFromTensor":
                 continue
diff --git a/tf2onnx/rewriter/rnn_utils.py b/tf2onnx/rewriter/rnn_utils.py
@@ -163,6 +163,53 @@ def make_grucell_pattern(enter_or_id="Enter"):
 
 grucell_pattern = make_grucell_pattern()
 
+def make_keras_gru_split_pattern(bias_name, kernel_name, input_name, input_op_type):
+    return OpTypePattern("Split", inputs=[
+        OpTypePattern("Const"),
+        OpTypePattern("BiasAdd", inputs=[
+            OpTypePattern("MatMul", inputs=[
+                OpTypePattern(input_op_type, name=input_name),
+                OpTypePattern("Placeholder|PlaceholderV2|Identity", name=kernel_name),
+            ], allow_reorder=False),
+            OpTypePattern("Placeholder|PlaceholderV2", name=bias_name)
+        ])
+    ])
+
+keras_gru_split0_pattern = make_keras_gru_split_pattern("gate_bias", "gate_kernel", "gru_input", "TensorListGetItem")
+keras_gru_split1_pattern = \
+    make_keras_gru_split_pattern("hidden_bias", "hidden_kernel", "state", "Placeholder|PlaceholderV2")
+
+keras_gru_sigmoid_pattern = \
+    OpTypePattern("Sigmoid", inputs=[
+        OpTypePattern("Add|AddV2", inputs=[
+            keras_gru_split0_pattern,
+            keras_gru_split1_pattern
+        ])
+    ])
+
+keras_gru_pattern = \
+    OpTypePattern("Add|AddV2", name="cell_output", inputs=[
+        OpTypePattern("Mul", inputs=[
+            keras_gru_sigmoid_pattern,
+            OpTypePattern("Placeholder|PlaceholderV2")
+        ]),
+        OpTypePattern("Mul", inputs=[
+            OpTypePattern("Sub", inputs=[
+                OpTypePattern("Const"),
+                keras_gru_sigmoid_pattern
+            ], allow_reorder=False),
+            OpTypePattern("*", name="optional_activation", inputs=[
+                OpTypePattern("Add|AddV2", inputs=[
+                    keras_gru_split0_pattern,
+                    OpTypePattern("Mul", inputs=[
+                        keras_gru_sigmoid_pattern,
+                        keras_gru_split1_pattern
+                    ])
+                ])
+            ])
+        ])
+    ])
+
 cudnn_compatible_grucell_pattern = \
     OpTypePattern("Add", name="cell_output", inputs=[
         OpTypePattern("Mul", inputs=[
diff --git a/tf2onnx/rewriter/unit_rnn_rewriter_base.py b/tf2onnx/rewriter/unit_rnn_rewriter_base.py
@@ -31,6 +31,7 @@ def __init__(self):
         self.state_variables = {}
         self.input_size = None
         self.hidden_size = None
+        self.from_keras = False
 
         self.attributes = {} # onnx attributes
         # onnx inputs: [X, W, R, B, sequence_lens, initial_h, initial_c, P],