Merge pull request #505 from zhijxu-MS/enhance_SparseSoftmaxCrossEntropyWithLogits

nbcsm · web-flow · commit 03a6379da82a · 2019-05-07T20:18:02.000+08:00
enhance SparseSoftmaxCrossEntropyWithLogits and add related test case
diff --git a/tests/test_backend.py b/tests/test_backend.py
@@ -1935,16 +1935,15 @@ def test_softmax_cross_entropy_with_logits(self):
 
     def test_sparse_softmax_cross_entropy_with_logits(self):
         num_class = 5
-        label_val = np.array([3, 2, 0, 4]).astype(np.int32)
-        logits_val = np.random.random((len(label_val), num_class)).astype(np.float32)
-
-        label = tf.placeholder(tf.int32, shape=[None], name=_TFINPUT)
-        logits = tf.placeholder(tf.float32, shape=[None, num_class], name=_TFINPUT1)
-
-        res1 = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=logits)
-        _ = tf.identity(res1, name=_TFOUTPUT)
-
-        self._run_test_case([_OUTPUT], {_INPUT: label_val, _INPUT1: logits_val})
+        for logic_shape in [[None, None], [None, num_class]]:
+            tf.reset_default_graph()
+            label_val = np.array([3, 2, 0, 4]).astype(np.int32)
+            logits_val = np.random.random((len(label_val), num_class)).astype(np.float32)
+            label = tf.placeholder(tf.int32, shape=[None], name=_TFINPUT)
+            logits = tf.placeholder(tf.float32, shape=logic_shape, name=_TFINPUT1)
+            res1 = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=logits)
+            _ = tf.identity(res1, name=_TFOUTPUT)
+            self._run_test_case([_OUTPUT], {_INPUT: label_val, _INPUT1: logits_val})
 
     @check_target('rs6', 'SparseSoftmaxCrossEntropyWithLogits')
     def test_sparse_softmax_cross_entropy_with_logits_large_class(self):
diff --git a/tf2onnx/onnx_opset/nn.py b/tf2onnx/onnx_opset/nn.py
@@ -729,20 +729,25 @@ def version_9(cls, ctx, node, **kwargs):
         logit_dtype = ctx.get_dtype(node.input[0])
 
         label_name = node.input[1]
-        label_dtype = ctx.get_dtype(label_name)
 
-        num_class = logit_shape[-1]
-        utils.make_sure(num_class != -1,
-                        "number of class should be known, otherwise subgraph to get the info is needed")
-        # int64 is used because of onnxruntime "onehot" only supports this dtype
-        depth_node = ctx.make_const(utils.make_name("onehot_depth"), np.array([num_class]).astype(np.int64))
-        values_node = ctx.make_const(utils.make_name("onehot_values"), np.array([0, 1]).astype(np.int64))
+        if logit_shape is not None and logit_shape[-1] != -1:
+            num_class = logit_shape[-1]
+            node_nme = utils.make_name("onehot_depth")
+            depth_node = ctx.make_const(node_nme, np.array([num_class]).astype(np.int64)).output[0]
+        else:
+            logit_shape = ctx.make_node("Shape", [node.input[0]]).output[0]
+            slice_args = {"data": logit_shape,
+                          "starts": [-1], "ends": [int(utils.get_max_value(np.int32))]}
+            num_class = GraphBuilder(ctx).make_slice(kwargs=slice_args)
+            depth_node = num_class
+        values_node = ctx.make_const(utils.make_name("onehot_values"), np.array([0, 1]).astype(np.int64)).output[0]
+        label_dtype = ctx.get_dtype(label_name)
         if label_dtype != TensorProto.INT64:
             onehot_indice = ctx.make_node("Cast", [label_name], attr={"to": TensorProto.INT64}).output[0]
         else:
             onehot_indice = label_name
         label_node = ctx.make_node(op_type="OneHot",
-                                   inputs=[onehot_indice, depth_node.output[0], values_node.output[0]])
+                                   inputs=[onehot_indice, depth_node, values_node])
         # the above logic makes output dtype of label_node now always int64
         # make sure label has same dtype as logit
         if logit_dtype != TensorProto.INT64: