support LSTMBlockCell in tf2onnx mapping

wayuanho · wayuanho · commit d954d4353d55 · 2019-03-14T08:43:34.000Z
diff --git a/tests/test_lstmblock.py b/tests/test_lstmblock.py
@@ -0,0 +1,180 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+"""Unit Tests for lstm block cell."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.contrib import rnn
+from backend_test_base import Tf2OnnxBackendTestBase
+from common import unittest_main, check_tf_min_version, check_opset_min_version
+
+
+# pylint: disable=missing-docstring,invalid-name,unused-argument,using-constant-test
+
+
+class LSTMBlockTests(Tf2OnnxBackendTestBase):
+    @check_opset_min_version(8, "Scan")
+    def test_single_dynamic_lstm(self):
+        units = 5
+        batch_size = 6
+        x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]], dtype=np.float32)
+        x_val = np.stack([x_val] * batch_size)
+
+        x = tf.placeholder(tf.float32, x_val.shape, name="input_1")
+
+        # no scope
+        cell = rnn.LSTMBlockCell(units, use_peephole=False)
+        outputs, cell_state = tf.nn.dynamic_rnn(
+            cell,
+            x,
+            dtype=tf.float32)
+
+        _ = tf.identity(outputs, name="output")
+        _ = tf.identity(cell_state, name="cell_state")
+
+        input_names_with_port = ["input_1:0"]
+        feed_dict = {"input_1:0": x_val}
+
+        output_names_with_port = ["output:0", "cell_state:0"]
+        self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-06, atol=1e-07)
+
+    # ==============================================================================================
+    # NOTE: the unittest above should be converted into a single LSTM op, while following unittests
+    # should be first converted into a Scan op with LSTMBlockCell, then decoupled into several ops.
+    # ==============================================================================================
+
+    @check_opset_min_version(8, "Scan")
+    def test_single_dynamic_lstm_with_peephole(self):
+        units = 5
+        batch_size = 6
+        x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]], dtype=np.float32)
+        x_val = np.stack([x_val] * batch_size)
+
+        x = tf.placeholder(tf.float32, x_val.shape, name="input_1")
+
+        # no scope
+        cell = rnn.LSTMBlockCell(units, use_peephole=True)
+        outputs, cell_state = tf.nn.dynamic_rnn(
+            cell,
+            x,
+            dtype=tf.float32)
+
+        _ = tf.identity(outputs, name="output")
+        _ = tf.identity(cell_state, name="cell_state")
+
+        input_names_with_port = ["input_1:0"]
+        feed_dict = {"input_1:0": x_val}
+
+        output_names_with_port = ["output:0", "cell_state:0"]
+        self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-06, atol=1e-07)
+
+    @check_opset_min_version(8, "Scan")
+    def test_single_dynamic_lstm_with_cell_clip(self):
+        units = 5
+        batch_size = 6
+        x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]], dtype=np.float32)
+        x_val = np.stack([x_val] * batch_size)
+
+        x = tf.placeholder(tf.float32, x_val.shape, name="input_1")
+
+        # no scope
+        cell = rnn.LSTMBlockCell(units, cell_clip=0.05)
+        outputs, cell_state = tf.nn.dynamic_rnn(
+            cell,
+            x,
+            dtype=tf.float32)
+
+        _ = tf.identity(outputs, name="output")
+        _ = tf.identity(cell_state, name="cell_state")
+
+        input_names_with_port = ["input_1:0"]
+        feed_dict = {"input_1:0": x_val}
+
+        output_names_with_port = ["output:0", "cell_state:0"]
+        self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-06, atol=1e-07)
+
+    @check_opset_min_version(8, "Scan")
+    @check_tf_min_version("1.8")
+    def test_attention_wrapper_lstm_encoder(self):
+        size = 5
+        time_step = 3
+        input_size = 4
+        attn_size = size
+
+        batch_size = 9
+
+        # shape  [batch size, time step, size]
+        # attention_state: usually the output of an RNN encoder.
+        # This tensor should be shaped `[batch_size, max_time, ...]`
+        encoder_time_step = time_step
+        encoder_x_val = np.random.randn(encoder_time_step, input_size).astype('f')
+        encoder_x_val = np.stack([encoder_x_val] * batch_size)
+        encoder_x = tf.placeholder(tf.float32, encoder_x_val.shape, name="input_1")
+        encoder_cell = rnn.LSTMBlockCell(size)
+        output, attr_state = tf.nn.dynamic_rnn(encoder_cell, encoder_x, dtype=tf.float32)
+        _ = tf.identity(output, name="output_0")
+        attention_states = output
+        attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(attn_size,
+                                                                   attention_states)
+
+        match_input_fn = lambda curr_input, state: tf.concat([curr_input, state], axis=-1)
+        cell = rnn.LSTMBlockCell(size)
+        match_cell_fw = tf.contrib.seq2seq.AttentionWrapper(cell,
+                                                            attention_mechanism,
+                                                            attention_layer_size=attn_size,
+                                                            cell_input_fn=match_input_fn,
+                                                            output_attention=False)
+
+        decoder_time_step = 6
+        decoder_x_val = np.random.randn(decoder_time_step, input_size).astype('f')
+        decoder_x_val = np.stack([decoder_x_val] * batch_size)
+
+        decoder_x = tf.placeholder(tf.float32, decoder_x_val.shape, name="input_2")
+        output, attr_state = tf.nn.dynamic_rnn(match_cell_fw, decoder_x, dtype=tf.float32)
+
+        _ = tf.identity(output, name="output")
+        _ = tf.identity(attr_state.cell_state, name="final_state")
+
+        feed_dict = {"input_1:0": encoder_x_val, "input_2:0": decoder_x_val}
+        input_names_with_port = ["input_1:0", "input_2:0"]
+        output_names_with_port = ["output_0:0", "output:0", "final_state:0"]
+        self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, 0.1)
+
+    @check_opset_min_version(8, "Scan")
+    def test_multi_rnn_lstm(self):
+        units = 5
+        batch_size = 6
+        x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]], dtype=np.float32)
+        x_val = np.stack([x_val] * batch_size)
+
+        x = tf.placeholder(tf.float32, x_val.shape, name="input_1")
+
+        cell_0 = rnn.LSTMBlockCell(units)
+
+        cell_1 = rnn.LSTMBlockCell(units)
+
+        cell_2 = rnn.LSTMBlockCell(units)
+
+        cells = rnn.MultiRNNCell([cell_0, cell_1, cell_2], state_is_tuple=True)
+        outputs, cell_state = tf.nn.dynamic_rnn(cells,
+                                                x,
+                                                dtype=tf.float32)
+
+        _ = tf.identity(outputs, name="output")
+        _ = tf.identity(cell_state, name="cell_state")
+
+        input_names_with_port = ["input_1:0"]
+        feed_dict = {"input_1:0": x_val}
+
+        output_names_with_port = ["output:0", "cell_state:0"]
+        self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-06, atol=1e-07)
+
+if __name__ == '__main__':
+    unittest_main()
diff --git a/tf2onnx/function/__init__.py b/tf2onnx/function/__init__.py
@@ -7,9 +7,17 @@
 from __future__ import unicode_literals
 
 from .gathernd import gathernd_op
+from .lstm_block_cell import lstm_block_cell_op
 from .matrixbandpart import matrixbandpart_op
 from .range import range_op7
 from .select import select_op8
 from .sparse_softmax_cross_entropy_with_logits import sparse_softmax_cross_entropy_with_logits_op
 
-__all__ = ["gathernd_op", "matrixbandpart_op", "range_op7", "select_op8", "sparse_softmax_cross_entropy_with_logits_op"]
+__all__ = [
+    "gathernd_op",
+    "lstm_block_cell_op",
+    "matrixbandpart_op",
+    "range_op7",
+    "select_op8",
+    "sparse_softmax_cross_entropy_with_logits_op"
+]
diff --git a/tf2onnx/function/lstm_block_cell.py b/tf2onnx/function/lstm_block_cell.py
@@ -0,0 +1,143 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+"""
+tf2onnx.tf2onnx - lstm block cell conversion
+"""
+import numpy as np
+from tf2onnx import utils
+
+# pylint: disable=unused-argument
+
+
+def lstm_block_cell_op(ctx, node, name, args):
+    """
+    Args:
+      x: A `Tensor`. Must be one of the following types: `float32`.
+        The input to the LSTM cell, shape (batch_size, num_inputs).
+      cs_prev: A `Tensor`. Must have the same type as `x`.
+        Value of the cell state at previous time step.
+      h_prev: A `Tensor`. Must have the same type as `x`.
+        Output of the previous cell at previous time step.
+      w: A `Tensor`. Must have the same type as `x`. The weight matrix.
+      wci: A `Tensor`. Must have the same type as `x`.
+        The weight matrix for input gate peephole connection.
+      wcf: A `Tensor`. Must have the same type as `x`.
+        The weight matrix for forget gate peephole connection.
+      wco: A `Tensor`. Must have the same type as `x`.
+        The weight matrix for output gate peephole connection.
+      b: A `Tensor`. Must have the same type as `x`. The bias vector.
+      forget_bias: An optional `float`. Defaults to `1`. The forget gate bias.
+      cell_clip: An optional `float`. Defaults to `-1` (no clipping).
+        Value to clip the 'cs' value to. Disable by setting to negative value.
+      use_peephole: An optional `bool`. Defaults to `False`.
+        Whether to use peephole weights.
+      name: A name for the operation (optional).
+    Returns:
+      A tuple of `Tensor` objects (i, cs, f, o, ci, co, h).
+      i: A `Tensor`. Has the same type as `x`. The input gate.
+      cs: A `Tensor`. Has the same type as `x`. The cell state before the tanh.
+      f: A `Tensor`. Has the same type as `x`. The forget gate.
+      o: A `Tensor`. Has the same type as `x`. The output gate.
+      ci: A `Tensor`. Has the same type as `x`. The cell input.
+      co: A `Tensor`. Has the same type as `x`. The cell after the tanh.
+      h: A `Tensor`. Has the same type as `x`. The output h vector.
+    ```python
+    xh = [x, h_prev]
+    [i, ci, f, o] = xh * w + b
+    f = f + forget_bias
+    if not use_peephole:
+      wci = wcf = wco = 0
+    i = sigmoid(cs_prev .* wci + i)
+    f = sigmoid(cs_prev .* wcf + f)
+    ci = tanh(ci)
+    cs = ci .* i + cs_prev .* f
+    cs = clip(cs, cell_clip)
+    o = sigmoid(cs * wco + o)
+    co = tanh(cs)
+    h = co .* o
+    ```
+    """
+    nodes = []
+    x, cs_prev, h_prev, w, wci, wcf, wco, b = node.input
+    forget_bias = float(node.get_attr("forget_bias").f)
+    cell_clip = float(node.get_attr("cell_clip").f)
+    use_peephole = bool(node.get_attr("use_peephole").i)
+
+    def make_sigmoid(i, w, b):
+        i_w_node = ctx.make_node("Mul", [i, w])
+        i_w_b_node = ctx.make_node("Add", [i_w_node.output[0], b])
+        output_node = ctx.make_node("Sigmoid", [i_w_b_node.output[0]])
+        nodes.extend([i_w_node, i_w_b_node, output_node])
+        return output_node.output[0]
+
+    # xh = [x, h]
+    xh_node = ctx.make_node("Concat", [x, h_prev], attr={"axis": 1})
+
+    # i, ci, f, o = xh * w + b
+    xh_w_node = ctx.make_node("MatMul", [xh_node.output[0], w])
+    w_shape = ctx.get_shape(w)
+    if len(w_shape) != 2 or w_shape[1] % 4 != 0:
+        raise RuntimeError("shape of W of LSTMBlockCell {} should be times of 4".format(name))
+    merged_output_node = ctx.make_node("Add", [xh_w_node.output[0], b])
+    w_last_dim = int(w_shape[1] / 4)
+    split = [w_last_dim] * 4
+    split_output_node = ctx.make_node(
+        "Split", [merged_output_node.output[0]],
+        attr={"axis": 1, "split": split},
+        output_count=4
+    )
+    i, ci, f, o = split_output_node.output
+
+    # f = f + forget_bias
+    forget_bias_const = ctx.make_const(
+        utils.make_name("{}__forget_bias".format(name)),
+        np.array(forget_bias, dtype=np.float32)
+    )
+    f_node = ctx.make_node("Add", [f, forget_bias_const.output[0]])
+
+    if not use_peephole:
+        zeros_const = ctx.make_const(
+            utils.make_name("{}__zeros_const".format(name)),
+            np.zeros([w_last_dim], dtype=np.float32)
+        )
+        nodes.append(zeros_const)
+        wci = zeros_const.output[0]
+        wcf = zeros_const.output[0]
+        wco = zeros_const.output[0]
+
+    # i = sigmoid(cs_prev .* wci + i)
+    i = make_sigmoid(cs_prev, wci, i)
+    # f = sigmoid(cs_prev .* wcf + f)
+    f = make_sigmoid(cs_prev, wcf, f_node.output[0])
+    # ci = Tanh(ci)
+    ci_node = ctx.make_node("Tanh", [ci])
+    # cs = ci .* i + f .* cs_prev
+    ci_i_node = ctx.make_node("Mul", [ci_node.output[0], i])
+    cs_prev_f_node = ctx.make_node("Mul", [cs_prev, f])
+    cs_node = ctx.make_node("Add", [ci_i_node.output[0], cs_prev_f_node.output[0]])
+    cs = cs_node.output[0]
+    # cs = clip(cs)
+    if cell_clip > 0:
+        cs_clip_node = ctx.make_node("Clip", [cs], attr={"max": cell_clip, "min": -cell_clip})
+        nodes.append(cs_clip_node)
+        cs = cs_clip_node.output[0]
+    # o = cs * wco + o
+    o = make_sigmoid(cs, wco, o)
+    # co = Tanh(cs)
+    co_node = ctx.make_node("Tanh", [cs])
+    # h = co .* o
+    h_node = ctx.make_node("Mul", [co_node.output[0], o])
+
+    def replace_output(old_output, new_output):
+        ctx.replace_all_inputs(ctx.get_nodes(), old_output, new_output)
+        ctx.copy_dtype(old_output, new_output)
+        ctx.copy_shape(old_output, new_output)
+
+    replace_output(node.output[0], i)
+    replace_output(node.output[1], cs)
+    replace_output(node.output[2], f)
+    replace_output(node.output[3], o)
+    replace_output(node.output[4], ci_node.output[0])
+    replace_output(node.output[5], co_node.output[0])
+    replace_output(node.output[6], h_node.output[0])
diff --git a/tf2onnx/tfonnx.py b/tf2onnx/tfonnx.py
@@ -1662,6 +1662,7 @@ def where_op(ctx, node, name, args):
     "Log": (direct_op, []),
     "LogSoftmax": (direct_op, ["LogSoftmax"]),
     "LRN": (lrn_op, []),
+    "LSTMBlockCell": (lstm_block_cell_op, []),
     "LogicalAnd": (broadcast_op, ["And"]),
     "LogicalNot": (direct_op, ["Not"]),
     "LogicalOr": (broadcast_op, ["Or"]),