use onehot to softmax with cross entropy

zhijxu-MS · zhijxu-MS · commit 09c202d5050e · 2019-03-27T09:36:21.000+08:00
diff --git a/tests/test_backend.py b/tests/test_backend.py
@@ -1612,6 +1612,23 @@ def test_shape_int64(self):
         kwargs = {"check_dtype": True}
         self._run_test_case([_OUTPUT], {_INPUT: x_val}, **kwargs)
 
+    @check_opset_min_version(7, "broadcasting op")
+    def test_softmax_cross_entropy_with_logits(self):
+        num_class = 5
+        data_shape = [100, num_class]
+        for np_dtype, tf_dtype in zip([np.int32, np.int64], [tf.int32, tf.int64]):
+            tf.reset_default_graph()
+            label_val = np.random.randint(0, num_class - 1, data_shape).astype(np_dtype)
+            logits_val = np.random.random(data_shape).astype(np.float32)
+
+            label = tf.placeholder(tf_dtype, shape=data_shape, name=_TFINPUT)
+            logits = tf.placeholder(tf.float32, shape=data_shape, name=_TFINPUT1)
+
+            res1 = tf.nn.softmax_cross_entropy_with_logits_v2(labels=label, logits=logits)
+            _ = tf.identity(res1, name=_TFOUTPUT)
+
+            self._run_test_case([_OUTPUT], {_INPUT: label_val, _INPUT1: logits_val}, atol=1e-5)
+
     def test_sparse_softmax_cross_entropy_with_logits(self):
         num_class = 5
         label_val = np.array([3, 2, 0, 4]).astype(np.int32)
diff --git a/tf2onnx/function/__init__.py b/tf2onnx/function/__init__.py
@@ -11,13 +11,17 @@
 from tf2onnx.function.matrixbandpart import matrixbandpart_op
 from tf2onnx.function.range import range_op7
 from tf2onnx.function.select import select_op8
+from tf2onnx.function.sparse_softmax_cross_entropy_with_logits import softmax_cross_entropy_with_logits_op
 from tf2onnx.function.sparse_softmax_cross_entropy_with_logits import sparse_softmax_cross_entropy_with_logits_op
+from tf2onnx.function.sparse_softmax_cross_entropy_with_logits import sparse_softmax_cross_entropy_with_logits_op9
 
 __all__ = [
     "gathernd_op",
     "lstm_block_cell_op",
     "matrixbandpart_op",
     "range_op7",
     "select_op8",
-    "sparse_softmax_cross_entropy_with_logits_op"
+    "softmax_cross_entropy_with_logits_op",
+    "sparse_softmax_cross_entropy_with_logits_op",
+    "sparse_softmax_cross_entropy_with_logits_op9",
 ]
diff --git a/tf2onnx/function/sparse_softmax_cross_entropy_with_logits.py b/tf2onnx/function/sparse_softmax_cross_entropy_with_logits.py
@@ -12,6 +12,37 @@
 
 # pylint: disable=unused-argument,missing-docstring
 
+
+def softmax_cross_entropy_with_logits_computation(ctx, label, logit, tf_ori_node):
+    label_dtype = ctx.get_dtype(label.output[0])
+    logit_dtype = ctx.get_dtype(logit.output[0])
+    utils.make_sure(label_dtype == logit_dtype, "the following logic only works on same dtype of label and logit")
+
+    log_softmax = ctx.make_node(op_type="LogSoftmax", inputs=logit.output)
+    # implement tf.multiply(-1, tf.reduce_sum(tf.multiply(label, log_softmax), axis=1))
+    mul1 = ctx.make_node(op_type="Mul", inputs=[label.output[0], log_softmax.output[0]])
+    reduce_sum = ctx.make_node(op_type="ReduceSum", inputs=[mul1.output[0]], attr={"axes": [-1]})
+    const_negative_one = ctx.make_const(name=utils.make_name("const_negative_one"),
+                                        np_val=np.array(-1).astype(utils.ONNX_TO_NUMPY_DTYPE[logit_dtype]))
+    mul2 = ctx.make_node(op_type="Mul", inputs=[const_negative_one.output[0], reduce_sum.output[0]])
+    shapes = tf_ori_node.output_shapes
+    dtypes = tf_ori_node.output_dtypes
+    ctx.remove_node(tf_ori_node.name)
+    res = ctx.make_node(op_type="Squeeze", inputs=[mul2.output[0]], attr={"axes": [1]},
+                        outputs=[tf_ori_node.output[0]], shapes=[shapes[0]], dtypes=[dtypes[0]])
+
+
+def softmax_cross_entropy_with_logits_op(ctx, node, name, args):
+    logits = node.inputs[0]
+    logit_dtype = ctx.get_dtype(logits.output[0])
+    labels = node.inputs[1]
+    label_dtype = ctx.get_dtype(labels.output[0])
+    if label_dtype != logit_dtype:
+        labels = ctx.make_node("Cast", labels.output, attr={"to": logit_dtype}, dtypes=[logit_dtype])
+
+    softmax_cross_entropy_with_logits_computation(ctx, labels, logits, node)
+
+
 def sparse_softmax_cross_entropy_with_logits_op(ctx, node, name, args):
     # make subgraph to implement one_hot, idea comes from onehot_op
     indices_name = node.input[1]
@@ -92,3 +123,32 @@ def sparse_softmax_cross_entropy_with_logits_op_by_gathernd(ctx, node, name, arg
     ctx.make_node(op_type="Squeeze",
                   inputs=[mul2.output[0]], outputs=[node.output[0]],
                   attr={"axes": [1]}, shapes=[shapes[0]], dtypes=[dtypes[0]])
+
+
+def sparse_softmax_cross_entropy_with_logits_op9(ctx, node, name, args):
+    # float32/64 output = SparseSoftmaxCrossEntropyWithLogits(float32/64 features, int32/64 labels)
+    # the detail math process of this op is: a = onehot(labels), b = logsoftmax(features), reduce_sum(mul(a, b))
+    logit_node = node.inputs[0]
+    logit_shape = ctx.get_shape(node.input[0])
+    logit_dtype = ctx.get_dtype(node.input[0])
+
+    label_name = node.input[1]
+    label_dtype = ctx.get_dtype(label_name)
+
+    num_class = logit_shape[-1]
+    utils.make_sure(num_class != -1, "number of class should be known, otherwise subgraph to get the info is needed")
+    # int64 is used because of onnxruntime "onehot" only supports this dtype
+    depth_node = ctx.make_const(utils.make_name("onehot_depth"), np.array([num_class]).astype(np.int64))
+    values_node = ctx.make_const(utils.make_name("onehot_values"), np.array([0, 1]).astype(np.int64))
+    if label_dtype != TensorProto.INT64:
+        onehot_indice = ctx.make_node("Cast", [label_name], attr={"to": TensorProto.INT64}).output[0]
+    else:
+        onehot_indice = label_name
+    label_node = ctx.make_node(op_type="OneHot", inputs=[onehot_indice, depth_node.output[0], values_node.output[0]])
+    # the above logic makes output dtype of label_node now always int64
+    # make sure label has same dtype as logit
+    if logit_dtype != TensorProto.INT64:
+        label_node = ctx.make_node("Cast", label_node.output, attr={"to": logit_dtype}, dtypes=[logit_dtype])
+
+    softmax_cross_entropy_with_logits_computation(ctx, label_node, logit_node, node)
+
diff --git a/tf2onnx/tfonnx.py b/tf2onnx/tfonnx.py
@@ -1841,6 +1841,7 @@ def where_op(ctx, node, name, args):
     "ResizeNearestNeighbor": (upsample_op7, ["Upsample", "nearest"]),
     "Sin": (direct_op, []),
     "Sub": (broadcast_op7, []),
+    "SoftmaxCrossEntropyWithLogits": (softmax_cross_entropy_with_logits_op, []),
     "Tan": (direct_op, []),
     "Tile": (tile_op7, []),
     "TruncateDiv": (broadcast_op7, ["Div"]),
@@ -1870,6 +1871,7 @@ def where_op(ctx, node, name, args):
     "ReverseSequence": (reverse_op9, []),
     "Sign": (sign_op9, []),
     "Sinh": (direct_op, []),
+    "SparseSoftmaxCrossEntropyWithLogits": (sparse_softmax_cross_entropy_with_logits_op9, []),
     "Where": (where_op, []),
 }