Merge pull request #395 from zhijxu-MS/zhijxu/push_DT_ok

nbcsm · web-flow · commit 62d7d8d88ef9 · 2019-03-27T13:25:31.000+08:00
use onehot to support some ops found in real tf model and fix bug of lstm
diff --git a/tests/run_pretrained_models.py b/tests/run_pretrained_models.py
@@ -84,7 +84,7 @@ class Test(object):
     target = []
 
     def __init__(self, url, local, make_input, input_names, output_names,
-                 disabled=False, more_inputs=None, rtol=0.01, atol=0.,
+                 disabled=False, more_inputs=None, rtol=0.01, atol=1e-6,
                  check_only_shape=False, model_type="frozen", force_input_shape=False,
                  skip_tensorflow=False):
         self.url = url
diff --git a/tests/test_backend.py b/tests/test_backend.py
@@ -1613,6 +1613,24 @@ def test_shape_int64(self):
         kwargs = {"check_dtype": True}
         self._run_test_case([_OUTPUT], {_INPUT: x_val}, **kwargs)
 
+    # @check_opset_min_version(7, "broadcasting op")
+    @unittest.skip("disable it for now, since fold const has bug")
+    def test_softmax_cross_entropy_with_logits(self):
+        num_class = 5
+        data_shape = [100, num_class]
+        for np_dtype, tf_dtype in zip([np.int32, np.int64], [tf.int32, tf.int64]):
+            tf.reset_default_graph()
+            label_val = np.random.randint(0, num_class - 1, data_shape).astype(np_dtype)
+            logits_val = np.random.random(data_shape).astype(np.float32)
+
+            label = tf.placeholder(tf_dtype, shape=data_shape, name=_TFINPUT)
+            logits = tf.placeholder(tf.float32, shape=data_shape, name=_TFINPUT1)
+
+            res1 = tf.nn.softmax_cross_entropy_with_logits_v2(labels=label, logits=logits)
+            _ = tf.identity(res1, name=_TFOUTPUT)
+
+            self._run_test_case([_OUTPUT], {_INPUT: label_val, _INPUT1: logits_val}, atol=1e-5)
+
     def test_sparse_softmax_cross_entropy_with_logits(self):
         num_class = 5
         label_val = np.array([3, 2, 0, 4]).astype(np.int32)
diff --git a/tests/test_gru.py b/tests/test_gru.py
@@ -123,34 +123,36 @@ def test_single_dynamic_gru_seq_length_is_const(self):
                            graph_validator=lambda g: check_gru_count(g, 1))
 
     def test_single_dynamic_gru_seq_length_is_not_const(self):
-        units = 5
-        batch_size = 1
-        x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.], [5., 5.]], dtype=np.float32)
-        x_val = np.stack([x_val] * batch_size)
-        x = tf.placeholder(tf.float32, x_val.shape, name="input_1")
-        initializer = init_ops.constant_initializer(0.5)
-
-        y_val = np.array([5], dtype=np.int32)
-        seq_length = tf.placeholder(tf.int32, y_val.shape, name="input_2")
-
-        # no scope
-        cell = rnn.GRUCell(
-            units,
-            kernel_initializer=initializer)
-        outputs, cell_state = tf.nn.dynamic_rnn(
-            cell,
-            x,
-            dtype=tf.float32,
-            sequence_length=tf.identity(seq_length))
+        for np_dtype, tf_dtype in [[np.int32, tf.int32], [np.int64, tf.int64], [np.float32, tf.float32]]:
+            tf.reset_default_graph()
+            units = 5
+            batch_size = 1
+            x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.], [5., 5.]], dtype=np.float32)
+            x_val = np.stack([x_val] * batch_size)
+            x = tf.placeholder(tf.float32, x_val.shape, name="input_1")
+            initializer = init_ops.constant_initializer(0.5)
+
+            y_val = np.array([5], dtype=np_dtype)
+            seq_length = tf.placeholder(tf_dtype, y_val.shape, name="input_2")
+
+            # no scope
+            cell = rnn.GRUCell(
+                units,
+                kernel_initializer=initializer)
+            outputs, cell_state = tf.nn.dynamic_rnn(
+                cell,
+                x,
+                dtype=tf.float32,
+                sequence_length=tf.identity(seq_length))
 
-        _ = tf.identity(outputs, name="output")
-        _ = tf.identity(cell_state, name="cell_state")
+            _ = tf.identity(outputs, name="output")
+            _ = tf.identity(cell_state, name="cell_state")
 
-        feed_dict = {"input_1:0": x_val, "input_2:0": y_val}
-        input_names_with_port = ["input_1:0", "input_2:0"]
-        output_names_with_port = ["output:0", "cell_state:0"]
-        self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-03, atol=1e-06,
-                           graph_validator=lambda g: check_gru_count(g, 1))
+            feed_dict = {"input_1:0": x_val, "input_2:0": y_val}
+            input_names_with_port = ["input_1:0", "input_2:0"]
+            output_names_with_port = ["output:0", "cell_state:0"]
+            self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-03, atol=1e-06,
+                               graph_validator=lambda g: check_gru_count(g, 1))
 
     def test_single_dynamic_gru_placeholder_input(self):
         units = 5
diff --git a/tests/test_lstm.py b/tests/test_lstm.py
@@ -147,36 +147,38 @@ def test_single_dynamic_lstm_seq_length_is_const(self):
                            graph_validator=lambda g: check_lstm_count(g, 1))
 
     def test_single_dynamic_lstm_seq_length_is_not_const(self):
-        units = 5
-        batch_size = 6
-        x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.], [5., 5.]], dtype=np.float32)
-        x_val = np.stack([x_val] * batch_size)
-        state_is_tuple = True
-        x = tf.placeholder(tf.float32, x_val.shape, name="input_1")
-        initializer = init_ops.constant_initializer(0.5)
-
-        y_val = np.array([4, 3, 4, 5, 2, 1], dtype=np.int32)
-        seq_length = tf.placeholder(tf.int32, y_val.shape, name="input_2")
-
-        # no scope
-        cell = rnn.LSTMCell(
-            units,
-            initializer=initializer,
-            state_is_tuple=state_is_tuple)
-        outputs, cell_state = tf.nn.dynamic_rnn(
-            cell,
-            x,
-            dtype=tf.float32,
-            sequence_length=tf.identity(seq_length))
+        for np_dtype, tf_dtype in [[np.int32, tf.int32], [np.int64, tf.int64], [np.float32, tf.float32]]:
+            tf.reset_default_graph()
+            units = 5
+            batch_size = 6
+            x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.], [5., 5.]], dtype=np.float32)
+            x_val = np.stack([x_val] * batch_size)
+            state_is_tuple = True
+            x = tf.placeholder(tf.float32, x_val.shape, name="input_1")
+            initializer = init_ops.constant_initializer(0.5)
+
+            y_val = np.array([4, 3, 4, 5, 2, 1], dtype=np_dtype)
+            seq_length = tf.placeholder(tf_dtype, y_val.shape, name="input_2")
+
+            # no scope
+            cell = rnn.LSTMCell(
+                units,
+                initializer=initializer,
+                state_is_tuple=state_is_tuple)
+            outputs, cell_state = tf.nn.dynamic_rnn(
+                cell,
+                x,
+                dtype=tf.float32,
+                sequence_length=tf.identity(seq_length))
 
-        _ = tf.identity(outputs, name="output")
-        _ = tf.identity(cell_state, name="cell_state")
+            _ = tf.identity(outputs, name="output")
+            _ = tf.identity(cell_state, name="cell_state")
 
-        feed_dict = {"input_1:0": x_val, "input_2:0": y_val}
-        input_names_with_port = ["input_1:0", "input_2:0"]
-        output_names_with_port = ["output:0", "cell_state:0"]
-        self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-06,
-                           graph_validator=lambda g: check_lstm_count(g, 1))
+            feed_dict = {"input_1:0": x_val, "input_2:0": y_val}
+            input_names_with_port = ["input_1:0", "input_2:0"]
+            output_names_with_port = ["output:0", "cell_state:0"]
+            self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-06,
+                               graph_validator=lambda g: check_lstm_count(g, 1))
 
     def test_single_dynamic_lstm_placeholder_input(self):
         units = 5
diff --git a/tf2onnx/function/__init__.py b/tf2onnx/function/__init__.py
@@ -11,13 +11,17 @@
 from tf2onnx.function.matrixbandpart import matrixbandpart_op
 from tf2onnx.function.range import range_op7
 from tf2onnx.function.select import select_op8
-from tf2onnx.function.sparse_softmax_cross_entropy_with_logits import sparse_softmax_cross_entropy_with_logits_op
+from tf2onnx.function.softmax_cross_entropy_with_logits import softmax_cross_entropy_with_logits_op7
+from tf2onnx.function.softmax_cross_entropy_with_logits import sparse_softmax_cross_entropy_with_logits_op7
+from tf2onnx.function.softmax_cross_entropy_with_logits import sparse_softmax_cross_entropy_with_logits_op9
 
 __all__ = [
     "gathernd_op",
     "lstm_block_cell_op",
     "matrixbandpart_op",
     "range_op7",
     "select_op8",
-    "sparse_softmax_cross_entropy_with_logits_op"
+    "softmax_cross_entropy_with_logits_op7",
+    "sparse_softmax_cross_entropy_with_logits_op7",
+    "sparse_softmax_cross_entropy_with_logits_op9",
 ]
diff --git a/tf2onnx/function/softmax_cross_entropy_with_logits.py b/tf2onnx/function/softmax_cross_entropy_with_logits.py
@@ -12,7 +12,38 @@
 
 # pylint: disable=unused-argument,missing-docstring
 
-def sparse_softmax_cross_entropy_with_logits_op(ctx, node, name, args):
+
+def _make_softmax_cross_entropy_with_logits(ctx, label, logit, tf_ori_node):
+    label_dtype = ctx.get_dtype(label.output[0])
+    logit_dtype = ctx.get_dtype(logit.output[0])
+    utils.make_sure(label_dtype == logit_dtype, "the following logic only works on same dtype of label and logit")
+
+    log_softmax = ctx.make_node(op_type="LogSoftmax", inputs=logit.output)
+    # implement tf.multiply(-1, tf.reduce_sum(tf.multiply(label, log_softmax), axis=1))
+    mul1 = ctx.make_node(op_type="Mul", inputs=[label.output[0], log_softmax.output[0]])
+    reduce_sum = ctx.make_node(op_type="ReduceSum", inputs=[mul1.output[0]], attr={"axes": [-1]})
+    const_negative_one = ctx.make_const(name=utils.make_name("const_negative_one"),
+                                        np_val=np.array(-1).astype(utils.ONNX_TO_NUMPY_DTYPE[logit_dtype]))
+    mul2 = ctx.make_node(op_type="Mul", inputs=[const_negative_one.output[0], reduce_sum.output[0]])
+    shapes = tf_ori_node.output_shapes
+    dtypes = tf_ori_node.output_dtypes
+    ctx.remove_node(tf_ori_node.name)
+    ctx.make_node(op_type="Squeeze", inputs=[mul2.output[0]], attr={"axes": [1]},
+                  outputs=[tf_ori_node.output[0]], shapes=[shapes[0]], dtypes=[dtypes[0]])
+
+
+def softmax_cross_entropy_with_logits_op7(ctx, node, name, args):
+    logits = node.inputs[0]
+    logit_dtype = ctx.get_dtype(logits.output[0])
+    labels = node.inputs[1]
+    label_dtype = ctx.get_dtype(labels.output[0])
+    if label_dtype != logit_dtype:
+        labels = ctx.make_node("Cast", labels.output, attr={"to": logit_dtype}, dtypes=[logit_dtype])
+
+    _make_softmax_cross_entropy_with_logits(ctx, labels, logits, node)
+
+
+def sparse_softmax_cross_entropy_with_logits_op7(ctx, node, name, args):
     # make subgraph to implement one_hot, idea comes from onehot_op
     indices_name = node.input[1]
     indices_shape = ctx.get_shape(indices_name)
@@ -92,3 +123,31 @@ def sparse_softmax_cross_entropy_with_logits_op_by_gathernd(ctx, node, name, arg
     ctx.make_node(op_type="Squeeze",
                   inputs=[mul2.output[0]], outputs=[node.output[0]],
                   attr={"axes": [1]}, shapes=[shapes[0]], dtypes=[dtypes[0]])
+
+
+def sparse_softmax_cross_entropy_with_logits_op9(ctx, node, name, args):
+    # float32/64 output = SparseSoftmaxCrossEntropyWithLogits(float32/64 features, int32/64 labels)
+    # the detail math process of this op is: a = onehot(labels), b = logsoftmax(features), reduce_sum(mul(a, b))
+    logit_node = node.inputs[0]
+    logit_shape = ctx.get_shape(node.input[0])
+    logit_dtype = ctx.get_dtype(node.input[0])
+
+    label_name = node.input[1]
+    label_dtype = ctx.get_dtype(label_name)
+
+    num_class = logit_shape[-1]
+    utils.make_sure(num_class != -1, "number of class should be known, otherwise subgraph to get the info is needed")
+    # int64 is used because of onnxruntime "onehot" only supports this dtype
+    depth_node = ctx.make_const(utils.make_name("onehot_depth"), np.array([num_class]).astype(np.int64))
+    values_node = ctx.make_const(utils.make_name("onehot_values"), np.array([0, 1]).astype(np.int64))
+    if label_dtype != TensorProto.INT64:
+        onehot_indice = ctx.make_node("Cast", [label_name], attr={"to": TensorProto.INT64}).output[0]
+    else:
+        onehot_indice = label_name
+    label_node = ctx.make_node(op_type="OneHot", inputs=[onehot_indice, depth_node.output[0], values_node.output[0]])
+    # the above logic makes output dtype of label_node now always int64
+    # make sure label has same dtype as logit
+    if logit_dtype != TensorProto.INT64:
+        label_node = ctx.make_node("Cast", label_node.output, attr={"to": logit_dtype}, dtypes=[logit_dtype])
+
+    _make_softmax_cross_entropy_with_logits(ctx, label_node, logit_node, node)
diff --git a/tf2onnx/rewriter/unit_rewriter_base.py b/tf2onnx/rewriter/unit_rewriter_base.py
@@ -215,21 +215,21 @@ def find_sequence_length_node(self, rnn_scope_name):
         seq_len_node_cnt = len(seq_len_nodes)
         if seq_len_node_cnt == 0:
             return None
+
         if seq_len_node_cnt == 1:
             seq_len_node = seq_len_nodes[0]
             if seq_len_node.is_const():
                 return seq_len_node
-            # input of the "identity" node may be a "cast"
-            # if so, then we have to keep it
-            # sentence "math_ops.to_int32(sequence_length)" in tf results in the "cast" op
-            if seq_len_node.inputs[0].type == "Cast":
-                cast_node = seq_len_node.inputs[0]
-                if not cast_node.inputs[0].name.startswith(rnn_scope_name):
-                    return seq_len_node.inputs[0]
-                raise ValueError("sequence length node should be outside of rnn scope")
+
             if not seq_len_node.inputs[0].name.startswith(rnn_scope_name):
                 return seq_len_node.inputs[0]
-            raise ValueError("sequence length node should be outside of rnn scope")
+
+            # input of the "identity" node may be a "cast" op generated by "math_ops.to_int32(sequence_length)" in tf
+            # if so, then we have to find cast input as sequence node.
+            node = seq_len_node.inputs[0]
+            if node.type == "Cast" and not node.inputs[0].name.startswith(rnn_scope_name):
+                return node.inputs[0]
+
         raise ValueError("there are more sequence length nodes than expected")
 
     def get_rnn_input_blacklist(self, rnn_weights, rnn_props):
@@ -342,6 +342,13 @@ def process_seq_length(self, rnn_props, seq_length_node):
                                                attr={"to": onnx_pb.TensorProto.INT32})
 
             self.all_nodes.extend([timestep_node, tile_node, seq_length_node])
+        else:
+            # LSTM sequence_lens needs to be int32
+            ori_seq_dtype = self.g.get_dtype(seq_length_node.name)
+            if ori_seq_dtype != onnx_pb.TensorProto.INT32:
+                seq_length_node = self.g.make_node('Cast', [seq_length_node.output[0]],
+                                                   attr={"to": onnx_pb.TensorProto.INT32})
+                self.all_nodes.append(seq_length_node)
 
         rnn_props.onnx_input_ids["sequence_lens"] = seq_length_node.output[0]
         return seq_length_node, batchsize_node
diff --git a/tf2onnx/tfonnx.py b/tf2onnx/tfonnx.py
@@ -1800,7 +1800,6 @@ def where_op(ctx, node, name, args):
     "ExpandDims": (expanddims_op7, []),
     "OneHot": (onehot_op, []),
     "Reshape": (reshape_op5, []),
-    "SparseSoftmaxCrossEntropyWithLogits": (sparse_softmax_cross_entropy_with_logits_op, [])
 }
 
 _OPSET_6 = {
@@ -1841,6 +1840,8 @@ def where_op(ctx, node, name, args):
     "ResizeNearestNeighbor": (upsample_op7, ["Upsample", "nearest"]),
     "Sin": (direct_op, []),
     "Sub": (broadcast_op7, []),
+    "SoftmaxCrossEntropyWithLogits": (softmax_cross_entropy_with_logits_op7, []),
+    "SparseSoftmaxCrossEntropyWithLogits": (sparse_softmax_cross_entropy_with_logits_op7, []),
     "Tan": (direct_op, []),
     "Tile": (tile_op7, []),
     "TruncateDiv": (broadcast_op7, ["Div"]),
@@ -1870,6 +1871,7 @@ def where_op(ctx, node, name, args):
     "ReverseSequence": (reverse_op9, []),
     "Sign": (sign_op9, []),
     "Sinh": (direct_op, []),
+    "SparseSoftmaxCrossEntropyWithLogits": (sparse_softmax_cross_entropy_with_logits_op9, []),
     "Where": (where_op, []),
 }