review feedack, merge master

guschmue · guschmue · commit 68e6abb63586 · 2019-04-09T13:29:44.000-07:00
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) Microsoft Corporation
+Copyright (c) ONNX Project Contributors
 All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
diff --git a/tests/test_backend.py b/tests/test_backend.py
@@ -1912,14 +1912,14 @@ def test_ceil(self):
     def test_softplus(self):
         x_val = np.array([-1, 0, 1], dtype=np.float32)
         x = tf.placeholder(tf.float32, [3], name=_TFINPUT)
-        x_ = tf.math.softplus(x)
+        x_ = tf.nn.softplus(x)
         _ = tf.identity(x_, name=_TFOUTPUT)
         self._run_test_case([_OUTPUT], {_INPUT: x_val})
 
     def test_softsign(self):
         x_val = np.array([-1, 0, 1], dtype=np.float32)
         x = tf.placeholder(tf.float32, [3], name=_TFINPUT)
-        x_ = tf.math.softsign(x)
+        x_ = tf.nn.softsign(x)
         _ = tf.identity(x_, name=_TFOUTPUT)
         self._run_test_case([_OUTPUT], {_INPUT: x_val})
 
diff --git a/tests/test_optimizers.py b/tests/test_optimizers.py
@@ -14,7 +14,6 @@
 from backend_test_base import Tf2OnnxBackendTestBase
 from common import unittest_main, group_nodes_by_type
 
-
 # pylint: disable=missing-docstring,invalid-name,unused-argument,using-constant-test
 
 class OptimizerTests(Tf2OnnxBackendTestBase):
@@ -423,5 +422,69 @@ def test_duplicated_need_multiple_run(self):
                                                 op_type="Log", remaining_op_num=3)
     # Merge Duplicated Nodes Optimizer Tests End
 
+    # Const Fold Optimizer Tests Start
+
+    def test_const_fold_trans_with_const1(self):
+        shape = (6, 6)
+        const_tensor = helper.make_tensor(name='const_tensor', data_type=TensorProto.FLOAT, dims=shape,
+                                          vals=np.random.randn(*shape).flatten().astype(np.float32))
+        node1 = helper.make_node("Constant", [], ["const"], value=const_tensor)
+        node2 = helper.make_node("Transpose", ["const"], ["value1"])
+        node3 = helper.make_node("Add", ["value1", "X"], ["res"])
+
+        graph = helper.make_graph(
+            [node1, node2, node3],
+            "test_const_fold_trans_with_const1",
+            [helper.make_tensor_value_info("X", TensorProto.FLOAT, shape)],
+            [helper.make_tensor_value_info("res", TensorProto.FLOAT, shape)],
+        )
+
+        model_proto = helper.make_model(graph, producer_name="onnx-tests")
+        self.run_transpose_compare(["res"], {"X": np.random.randn(*shape).astype(np.float32)},
+                                   model_proto, remaining_transpose_num=0)
+
+    def test_const_fold_trans_with_const2(self):
+        # need multiple optimization run
+        shape = (6, 6)
+        const_tensor = helper.make_tensor(name='const_tensor', data_type=TensorProto.FLOAT, dims=shape,
+                                          vals=np.random.randn(*shape).flatten().astype(np.float32))
+        node1 = helper.make_node("Constant", [], ["const"], value=const_tensor)
+        node2 = helper.make_node("Transpose", ["const"], ["value1"])
+        node3 = helper.make_node("Transpose", ["value1"], ["value2"])
+        node4 = helper.make_node("Add", ["value2", "X"], ["res"])
+
+        graph = helper.make_graph(
+            [node1, node2, node3, node4],
+            "test_const_fold_trans_with_const2",
+            [helper.make_tensor_value_info("X", TensorProto.FLOAT, shape)],
+            [helper.make_tensor_value_info("res", TensorProto.FLOAT, shape)],
+        )
+
+        model_proto = helper.make_model(graph, producer_name="onnx-tests")
+        self.run_transpose_compare(["res"], {"X": np.random.randn(*shape).astype(np.float32)},
+                                   model_proto, remaining_transpose_num=0)
+
+    def test_const_fold_node_is_output(self):
+        # need multiple optimization run
+        shape = (6, 6)
+        const_tensor = helper.make_tensor(name='const_tensor', data_type=TensorProto.FLOAT, dims=shape,
+                                          vals=np.random.randn(*shape).flatten().astype(np.float32))
+        node1 = helper.make_node("Constant", [], ["const"], value=const_tensor)
+        node2 = helper.make_node("Transpose", ["const"], ["value1"])
+        node3 = helper.make_node("Transpose", ["value1"], ["res"])
+
+        graph = helper.make_graph(
+            [node1, node2, node3],
+            "test_const_fold_node_is_output",
+            [],
+            [helper.make_tensor_value_info("res", TensorProto.FLOAT, shape)],
+        )
+
+        model_proto = helper.make_model(graph, producer_name="onnx-tests")
+        self.run_transpose_compare(["res"], {},
+                                   model_proto, remaining_transpose_num=0)
+    # Const Fold Optimizer Tests End
+
+
 if __name__ == "__main__":
     unittest_main()
diff --git a/tf2onnx/onnx_opset/tensor.py b/tf2onnx/onnx_opset/tensor.py
@@ -133,15 +133,16 @@ def version_4(cls, ctx, node, **kwargs):
         else:
             del node.attr["axis"]
 
-        shape = ctx.get_shape(node.input[0])
         if axis and axis.ints:
             axis = axis.ints
             neg_axis = any([val < 0 for val in axis])
             if neg_axis:
+                shape = ctx.get_shape(node.input[0])
                 utils.make_sure(shape is not None, "squeeze input shape cannot be None")
                 shape_len = len(shape)
                 axis = [a + shape_len if a < 0 else a for a in axis]
         else:
+            shape = ctx.get_shape(node.input[0])
             utils.make_sure(shape is not None, "squeeze input shape cannot be None")
             axis = [i for i, j in enumerate(shape) if j == 1]
         node.set_attr("axes", axis)
diff --git a/tf2onnx/optimizer/__init__.py b/tf2onnx/optimizer/__init__.py
@@ -10,15 +10,18 @@
 import traceback
 from collections import OrderedDict
 
+from tf2onnx.optimizer.const_fold_optimizer import ConstFoldOptimizer
 from tf2onnx.optimizer.identity_optimizer import IdentityOptimizer
 from tf2onnx.optimizer.merge_duplicated_nodes_optimizer import MergeDuplicatedNodesOptimizer
 from tf2onnx.optimizer.transpose_optimizer import TransposeOptimizer
 
+
 # pylint: disable=missing-docstring, broad-except
 
 # optimizer sequence need to be considered carefully
 _optimizers = OrderedDict([
     ("transpose_opt", TransposeOptimizer),
+    ("fold_const", ConstFoldOptimizer),
     # merge_duplicated_nodes should be used after transpose_opt
     # for transpose_opt may have some trans nodes that can be merge
     ("merge_duplicated_nodes", MergeDuplicatedNodesOptimizer),
diff --git a/tf2onnx/optimizer/const_fold_optimizer.py b/tf2onnx/optimizer/const_fold_optimizer.py
@@ -0,0 +1,101 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+"""const fold Optimizer.
+   if op's inputs are all const then do op computation when building the graph to improve performance
+   for example, input of transpose node is const then we can do transpose statically instead of at runtime
+"""
+
+from tf2onnx.optimizer.optimizer_base import GraphOptimizerBase
+from tf2onnx import utils
+
+# pylint: disable=logging-not-lazy,unused-argument,missing-docstring
+
+# key is op_type, value is the function to compute outputs
+# the schema of function is: inputs are(node, graph), output is a list of constant values.
+_func_map = {}
+
+
+def _register_func(op_type):
+    def _internal_fun(func):
+        _func_map[op_type] = func
+        return func
+    return _internal_fun
+
+
+class ConstFoldOptimizer(GraphOptimizerBase):
+
+    def __init__(self, debug=False):
+        super(ConstFoldOptimizer, self).__init__("ConstFoldOptimizer", debug)
+
+    def _optimize(self, graph):
+        return self._apply_optimization(graph, self._optimize_at_current_graph_level)
+
+    def _optimize_at_current_graph_level(self, graph):
+        graph_changed = True
+        while graph_changed:
+            graph_changed = False
+            ops = graph.get_nodes()
+            for op in ops:
+                if self._should_skip(op):
+                    continue
+                if self._fold_node(op, graph):
+                    graph_changed = True
+        return graph
+
+    @staticmethod
+    def _should_skip(node):
+        # only support onnx official op for now, op in other domain is not supported for now
+        if not utils.is_onnx_domain(node.domain):
+            return True
+
+        if node.is_const() or node.is_graph_input():
+            return True
+
+        skip_type = ["Identity"]
+        if node.type in skip_type:
+            return True
+
+        return False
+
+    def _fold_node(self, node, graph):
+        """ if node's input are all const and it's not graph's output then it can be fold.
+            if node can be fold True will be return indicating that graph is changed
+        """
+        if self._all_inputs_are_const(node.inputs) and not self._is_graph_output(node, graph):
+            process_func = _func_map.get(node.type, None)
+            if process_func:
+                const_outputs = process_func(node, graph)
+                self._replace_node_with_const(node, graph, const_outputs)
+                return True
+        self.log.debug("need to add function to fold op %s whose op_type is %s", node.name, node.type)
+        return False
+
+    @staticmethod
+    def _all_inputs_are_const(nodes):
+        return all(node.is_const() for node in nodes if node)
+
+    @staticmethod
+    def _is_graph_output(node, graph):
+        node_out_set = set(node.output)
+        graph_out_set = set(graph.outputs)
+        return node_out_set.intersection(graph_out_set)
+
+    @staticmethod
+    def _replace_node_with_const(node, graph, vals):
+        utils.make_sure(len(node.output) == len(vals), "length of node outputs and const vals should be same")
+        for old_input, val in zip(node.output, vals):
+            const_node = graph.make_const(utils.make_name("const_fold_opt"), val)
+            graph.set_dtype(const_node.output[0], utils.map_numpy_to_onnx_dtype(val.dtype))
+            graph.set_shape(const_node.output[0], val.shape)
+            graph.replace_all_inputs(graph.get_nodes(), old_input, const_node.output[0])
+        graph.remove_node(node.name)
+
+    @staticmethod
+    @_register_func("Transpose")
+    def _fold_transpose(node, graph) -> list:
+        const_val = node.inputs[0].get_tensor_value(as_list=False)
+        perm_attr = node.get_attr("perm")
+        perm = perm_attr.ints if perm_attr else None
+        const_val_after_trans = const_val.transpose(perm)
+        return [const_val_after_trans]
diff --git a/tf2onnx/optimizer/identity_optimizer.py b/tf2onnx/optimizer/identity_optimizer.py
@@ -6,14 +6,9 @@
 """
 
 from __future__ import unicode_literals
-import logging
 
 from tf2onnx.optimizer.optimizer_base import GraphOptimizerBase
 
-
-log = logging.getLogger("tf2onnx.optimizer.identity_optimizer")
-
-
 # pylint: disable=logging-not-lazy,unused-argument,missing-docstring,unused-variable,arguments-differ
 
 
@@ -22,7 +17,6 @@ class IdentityOptimizer(GraphOptimizerBase):
 
     def __init__(self, debug=False):
         super(IdentityOptimizer, self).__init__("IdentityOptimizer", debug)
-
         self._g = None
 
     def optimize(self, graph):
@@ -31,8 +25,8 @@ def optimize(self, graph):
         self._optimize_recursively(self._g)
         current_counter = self._g.dump_node_statistics()
         identity_cnt = current_counter["Identity"]
-        current_counter.subtract(previous_counter)
-        log.info(" %d identity op(s) left, ops diff after identity optimization: %s", identity_cnt, current_counter)
+        self.log.info(" %d identity op(s) left", identity_cnt)
+        self._print_stat_diff(previous_counter, current_counter)
         return self._g
 
     def _optimize_recursively(self, g):
@@ -42,9 +36,9 @@ def _optimize_recursively(self, g):
             body_graphs = n.get_body_graphs()
             if body_graphs:
                 for attr, b_g in body_graphs.items():
-                    log.debug("start handling subgraph of %s's attribute %s", n.name, attr)
+                    self.log.debug("start handling subgraph of %s's attribute %s", n.name, attr)
                     self._optimize_recursively(b_g)
-                    log.debug("finish handling subgraph of %s's attribute %s", n.name, attr)
+                    self.log.debug("finish handling subgraph of %s's attribute %s", n.name, attr)
 
     def _optimize(self, g):
         has_update = True
@@ -53,7 +47,7 @@ def _optimize(self, g):
             nodes = [n for n in g.get_nodes() if n.type == "Identity"]
             for n in nodes:
                 if n.graph is None:
-                    log.info("node has been removed from this graph, skip")
+                    self.log.info("node has been removed from this graph, skip")
                     continue
 
                 graph_outputs = set(n.output).intersection(g.outputs)
@@ -72,19 +66,18 @@ def _handle_non_graph_output_identity(graph, identity):
         graph.remove_node(identity.name)
         return True
 
-    @staticmethod
-    def _handle_graph_output_identity(graph, identity, graph_outputs):
+    def _handle_graph_output_identity(self, graph, identity, graph_outputs):
         input_id = identity.input[0]
         input_node = identity.inputs[0]
 
         if input_node.graph != graph:
             # If input node is in parent graph, we don't handle it now
-            log.debug("input node in parent graph, skip")
+            self.log.debug("input node in parent graph, skip")
             return False
 
         if input_node.is_graph_input():
             # Identity between input and output should not be removed.
-            log.debug("skip identity between input and output")
+            self.log.debug("skip identity between input and output")
             return False
 
         output_id = identity.output[0]
@@ -93,7 +86,7 @@ def _handle_graph_output_identity(graph, identity, graph_outputs):
         if input_id in graph.outputs:
             # input id already be graph output, so we cannot make that be another graph output.
             # this Identity must be kept.
-            log.debug("identity input already be graph output")
+            self.log.debug("identity input already be graph output")
             return False
 
         graph.remove_node(identity.name)
diff --git a/tf2onnx/optimizer/merge_duplicated_nodes_optimizer.py b/tf2onnx/optimizer/merge_duplicated_nodes_optimizer.py
@@ -8,7 +8,6 @@
 """
 
 from collections import defaultdict, namedtuple
-import logging
 
 from tf2onnx.optimizer.optimizer_base import GraphOptimizerBase
 
@@ -23,7 +22,6 @@ class MergeDuplicatedNodesOptimizer(GraphOptimizerBase):
     """
     def __init__(self, debug=False):
         super(MergeDuplicatedNodesOptimizer, self).__init__("MergeDuplicatedNodesOptimizer", debug)
-        self._log = logging.getLogger("tf2onnx.optimizer.%s" % self._name)
         # used internally
         self._graph_can_be_optimized = True
 
diff --git a/tf2onnx/optimizer/optimizer_base.py b/tf2onnx/optimizer/optimizer_base.py
@@ -4,6 +4,7 @@
 """Graph Optimizer Base"""
 
 from __future__ import unicode_literals
+import logging
 
 
 class GraphOptimizerBase(object):
@@ -13,10 +14,12 @@ class GraphOptimizerBase(object):
     def __init__(self, name, debug=False):
         self._debug = debug
         self._name = name
+        self._log = logging.getLogger("tf2onnx.optimizer.%s" % self._name)
 
     def optimize(self, graph):
         original_node_statistics = graph.dump_node_statistics()
         graph = self._optimize(graph)
+        graph.delete_unused_nodes(graph.outputs)
         node_statistics = graph.dump_node_statistics()
         self._print_stat_diff(original_node_statistics, node_statistics)
         return graph
@@ -28,6 +31,10 @@ def _optimize(self, graph):
     def name(self):
         return self._name
 
+    @property
+    def log(self):
+        return self._log
+
     @staticmethod
     def _apply_optimization(graph, optimize_func):
         """
@@ -52,4 +59,4 @@ def _print_stat_diff(self, nodes_original, nodes_after_optimized):
         for key, value in nodes_after_optimized.items():
             if value != 0:
                 res[key] = value
-        self._log.info("after optimized, the optimization_statistics is %s", res)
+        self.log.info("the optimization gain is %s", res)
diff --git a/tf2onnx/optimizer/transpose_optimizer.py b/tf2onnx/optimizer/transpose_optimizer.py
diff --git a/tf2onnx/utils.py b/tf2onnx/utils.py