Merge branch 'master' of https://github.com/onnx/tensorflow-onnx into profile

sdpython · sdpython · commit 811a3a309979 · 2020-09-02T10:29:20.000+02:00
diff --git a/README.md b/README.md
@@ -293,7 +293,7 @@ tf2onnx.tfonnx.process_tf_graph(tf_graph,
             onnx graph
     """
 ```
-For example in [examples/call_coverter_via_python.py]():
+For example in [examples/call_converter_via_python.py]():
 ```
 import tensorflow as tf
 import tf2onnx
diff --git a/examples/call_converter_via_python.py b/examples/call_converter_via_python.py
diff --git a/tests/test_backend.py b/tests/test_backend.py
@@ -2044,6 +2044,19 @@ def func(x):
             return tf.identity(x_, name=_TFOUTPUT)
         _ = self._run_test_case(func, [_OUTPUT], {_INPUT: x_val})
 
+    @check_tf_min_version("2.0")
+    @check_opset_min_version(13, "quantize_and_dequantize")
+    def test_qdq_per_channel_signed_input(self):
+        x_shape = [3, 3, 2]
+        x_val = np.arange(-np.prod(x_shape)/2, np.prod(x_shape)/2).astype("float32").reshape(x_shape)
+        def func(x):
+            x_ = quantize_and_dequantize(x, np.array([-1.72, -3.89]).astype(np.float32), \
+                                         np.array([5.12, 2.36]).astype(np.float32), \
+                                         signed_input=True, narrow_range=False, \
+                                         range_given=True, axis=-1)
+            return tf.identity(x_, name=_TFOUTPUT)
+        _ = self._run_test_case(func, [_OUTPUT], {_INPUT: x_val})
+
     @skip_caffe2_backend()
     @check_opset_min_version(7, "resize_nearest_neighbor")
     def test_resize_nearest_neighbor(self):
diff --git a/tests/test_internals.py b/tests/test_internals.py
@@ -226,7 +226,7 @@ def test_node_attr_onnx(self):
         g = GraphUtil.create_graph_from_onnx_graph(graph_proto)
         n1 = g.get_node_by_name("n1")
         self.assertTrue("my_attr" in n1.attr)
-        self.assertTrue("my_attr" not in n1.attr_onnx)
+        self.assertTrue("my_attr" not in n1.get_onnx_attrs())
 
         n1 = helper.make_node("Conv", ["X", "W"], ["Y"], name="n1", domain="my_domain", my_attr="my_attr")
         graph_proto = helper.make_graph(
@@ -240,7 +240,7 @@ def test_node_attr_onnx(self):
         g = GraphUtil.create_graph_from_onnx_graph(graph_proto)
         n1 = g.get_node_by_name("n1")
         self.assertTrue("my_attr" in n1.attr)
-        self.assertTrue("my_attr" in n1.attr_onnx)
+        self.assertTrue("my_attr" in n1.get_onnx_attrs())
 
     def test_tensor_data(self):
         tensors = {
diff --git a/tf2onnx/graph.py b/tf2onnx/graph.py
@@ -28,6 +28,13 @@
 # todo(pengwa): remove protected-access later
 # pylint: disable=broad-except,protected-access
 
+class ExternalTensorStorage():
+    """Passed into graph and node methods to accumulate tensors to save externally"""
+    def __init__(self):
+        self.name_to_tensor_data = {}
+        self.name_counter = 0
+        self.external_tensor_size_threshold = 1024
+        self.node_to_modified_value_attr = {}
 
 class Node(object):
     """A Node - wrapper around onnx nodes that we use for graph manipulations."""
@@ -93,16 +100,40 @@ def inputs(self):
     def attr(self):
         return self._attr
 
-    @property
-    def attr_onnx(self):
-        """Return onnx valid attributes"""
+    def get_value_attr(self, external_tensor_storage=None):
+        """Return onnx attr for value property of node.
+        Attr is modified to point to external tensor data stored in external_tensor_storage, if included.
+        """
+        a = self._attr["value"]
+        if external_tensor_storage is not None and self in external_tensor_storage.node_to_modified_value_attr:
+            return external_tensor_storage.node_to_modified_value_attr[self]
+        if external_tensor_storage is None or a.type != AttributeProto.TENSOR:
+            return a
+        if np.product(a.t.dims) > external_tensor_storage.external_tensor_size_threshold:
+            a = copy.copy(a)
+            tensor_name = self.name + "_" + str(external_tensor_storage.name_counter)
+            external_tensor_storage.name_counter += 1
+            external_tensor_storage.name_to_tensor_data[tensor_name] = a.t.raw_data
+            external_tensor_storage.node_to_modified_value_attr[self] = a
+            a.t.raw_data = b'__EXTERNAL'
+            location = a.t.external_data.add()
+            location.key = "location"
+            location.value = tensor_name
+            a.t.data_location = TensorProto.EXTERNAL
+        return a
+
+    def get_onnx_attrs(self, external_tensor_storage=None):
+        """Return onnx valid attributes.
+        Attrs point to external tensor data stored in external_tensor_storage, if included."""
         schema = get_schema(self.type, self.graph.opset, self.domain)
         if schema is None and not (self.is_const() or self.is_graph_input()):
             logger.debug("Node %s uses non-stardard onnx op <%s, %s>, skip attribute check",
                          self.name, self.domain, self.type)
         onnx_attrs = {}
         for a in self._attr.values():
-            if schema is None or schema.has_attribute(a.name):
+            if a.name == "value":
+                onnx_attrs[a.name] = self.get_value_attr(external_tensor_storage)
+            elif schema is None or schema.has_attribute(a.name):
                 onnx_attrs[a.name] = a
         return onnx_attrs
 
@@ -333,7 +364,7 @@ def set_body_graph_as_attr(self, attr_name, graph):
         self.graph.contained_graphs[self.name].update({attr_name: graph})
         graph.parent_graph = self.graph
 
-    def update_proto(self):
+    def update_proto(self, external_tensor_storage=None):
         """Update protobuf from internal structure."""
         nodes = list(self._op.input)
         for node in nodes:
@@ -351,10 +382,10 @@ def update_proto(self):
         attr_graphs = self.get_body_graphs()
         if attr_graphs:
             for attr_name, sub_graph in attr_graphs.items():
-                graph_proto = sub_graph.make_graph("graph for " + self.name + " " + attr_name)
+                graph_proto = sub_graph.make_graph("graph for " + self.name + " " + attr_name, external_tensor_storage)
                 self.set_attr(attr_name, graph_proto)
 
-        attr = list(self.attr_onnx.values())
+        attr = list(self.get_onnx_attrs(external_tensor_storage).values())
         if attr:
             self._op.attribute.extend(attr)
 
@@ -748,10 +779,10 @@ def update_node_shape_dtype(self, node, override=False):
                 self.set_shape(output, shape)
                 logger.debug("Set shape of [%s] to %s", output, shape)
 
-    def update_proto(self):
+    def update_proto(self, external_tensor_storage=None):
         """Update the onnx protobuf from out internal Node structure."""
         for node in self._nodes:
-            node.update_proto()
+            node.update_proto(external_tensor_storage)
 
     def get_nodes(self):
         """Get node list."""
@@ -968,7 +999,7 @@ def _get_unvisited_child(g, node, not_visited):
         ret = [x for _, x in sorted(zip(label, ops))]
         self.reset_nodes(ret)
 
-    def make_graph(self, doc, graph_name=None):
+    def make_graph(self, doc, graph_name=None, external_tensor_storage=None):
         """
         Create GraphProto for onnx from internal graph.
         Args:
@@ -978,7 +1009,7 @@ def make_graph(self, doc, graph_name=None):
         graph_name = graph_name or self.graph_name
         self.delete_unused_nodes(self.outputs)
         self.topological_sort(self.get_nodes())
-        self.update_proto()
+        self.update_proto(external_tensor_storage)
 
         # TODO: we'd want to do something like this so that transpose optimizer is active
         # for  all (unit) tests
@@ -1021,7 +1052,7 @@ def make_graph(self, doc, graph_name=None):
             # not to use numpy_helper.from_array to create a new tensor
             # because sometimes onnx will have a bug that only check the tensor data in specific field
             # such as at upsample it only checks the float_data field.
-            t = op.get_attr("value")
+            t = op.get_value_attr(external_tensor_storage)
             tensor = helper.get_attribute_value(t)
             tensor.name = op.output[0]
             initializers.append(tensor)
@@ -1050,14 +1081,14 @@ def make_graph(self, doc, graph_name=None):
 
         return graph
 
-    def make_model(self, graph_doc, optimize=False, graph_name="tf2onnx", **kwargs):
+    def make_model(self, graph_doc, optimize=False, graph_name="tf2onnx", external_tensor_storage=None, **kwargs):
         """
         Create final ModelProto for onnx from internal graph.
         Args:
             optimize: optimize graph via onnx
             doc: text for doc string of the model
         """
-        graph = self.make_graph(graph_doc, graph_name)
+        graph = self.make_graph(graph_doc, graph_name, external_tensor_storage)
 
         if "producer_name" not in kwargs:
             kwargs = {"producer_name": "tf2onnx",
diff --git a/tf2onnx/onnx_opset/math.py b/tf2onnx/onnx_opset/math.py
@@ -32,7 +32,7 @@ class RealDiv(common.BroadcastOp):
     pass
 
 
-@tf_op(["LeakyRelu", "LogSoftmax", "Softplus", "Softsign"])
+@tf_op(["LeakyRelu", "Softplus", "Softsign"])
 class DirectOpSinceOpset1:
     @classmethod
     def version_1(cls, ctx, node, **kwargs):
@@ -185,7 +185,7 @@ def version_8(cls, ctx, node, **kwargs):
     def version_12(cls, ctx, node, **kwargs):
         node.type = 'Clip' # clip supports all types now
 
-@tf_op("Softmax")
+@tf_op(["LogSoftmax", "Softmax"])
 class Softmax:
     @classmethod
     def version_1(cls, ctx, node, **kwargs):
diff --git a/tf2onnx/optimizer/transpose_optimizer.py b/tf2onnx/optimizer/transpose_optimizer.py
@@ -282,7 +282,7 @@ def _handle_nhwc_tranpose(self, trans):
             return False
         # move transpose into branches to let Transposes can be "handled" in each branch
         for n in out_nodes:
-            branch_trans = n.graph.make_node("Transpose", [trans.input[0]], attr=trans.attr_onnx)
+            branch_trans = n.graph.make_node("Transpose", [trans.input[0]], attr=trans.get_onnx_attrs())
             n.graph.replace_input(n, trans.output[0], branch_trans.output[0])
 
         self._g.remove_node(trans.name)
@@ -407,7 +407,7 @@ def _add_handler(self, trans, node):
                 target_node.set_tensor_value(target_val)
 
                 conv_inputs = [t_p.input[0], t_p.input[1], node.input[1]]
-                conv_node = self._g.make_node(t_p.type, conv_inputs, attr=t_p.attr_onnx)
+                conv_node = self._g.make_node(t_p.type, conv_inputs, attr=t_p.get_onnx_attrs())
                 ops = self._g.get_nodes()
                 self._g.replace_input(trans, trans.input[0], utils.port_name(conv_node.name), 0)
                 self._g.replace_all_inputs(ops, node.output[0], trans.output[0])
diff --git a/tf2onnx/rewriter/quantization_ops_rewriter.py b/tf2onnx/rewriter/quantization_ops_rewriter.py
@@ -2,7 +2,7 @@
 # Licensed under the MIT license.
 
 """
-tf2onnx.rewriter - rewrite tensorflow QuantizeAndDequantizeV3 op
+tf2onnx.rewriter - rewrite tensorflow QuantizeAndDequantizeV2|QuantizeAndDequantizeV3 op
 """
 
 import numpy as np
@@ -32,47 +32,65 @@ def create_qdq_nodes(g, match_results):
         if not signed_input:
             min_quantized, max_quantized = [0, 255]
 
+        # Get axis attribute for per channel implementation.
+        if 'axis' in qdq_node.attr:
+            axis = qdq_node.attr['axis'].i
+
         # Get the min and max value of the inputs to QDQ op
         min_value = extract_numpy_array(qdq_node.inputs[1])
         max_value = extract_numpy_array(qdq_node.inputs[2])
 
-        # Calculate scales from the min and max values
-        scale_from_min_side = min_quantized/min_value if min_quantized*min_value > 0 else max_quantized
-        scale_from_max_side = max_quantized/max_value if max_quantized*max_value > 0 else max_quantized
-
-        if scale_from_min_side < scale_from_max_side:
-            scale = scale_from_min_side
-        else:
-            scale = scale_from_max_side
-
-        utils.make_sure(scale > 0, "Quantize/Dequantize scale must be greater than zero")
-
-        if signed_input:
-            zero_point = np.int8(0)
+        num_channels = min_value.shape[0]
+        scales = np.zeros(num_channels, dtype=np.float32)
+        zero_point_dtype = np.int8 if signed_input else np.uint8
+        zero_point = np.zeros(num_channels, dtype=zero_point_dtype)
+
+        for i in range(num_channels):
+            # Calculate scales from the min and max values
+            scale_from_min_side = min_quantized/min_value[i] if min_quantized*min_value[i] > 0 else max_quantized
+            scale_from_max_side = max_quantized/max_value[i] if max_quantized*max_value[i] > 0 else max_quantized
+
+            if scale_from_min_side < scale_from_max_side:
+                scale = scale_from_min_side
+            else:
+                scale = scale_from_max_side
+
+            utils.make_sure(scale > 0, "Quantize/Dequantize scale must be greater than zero")
+            scales[i] = np.float32(scale)
+
+        # Set scalars for scale and zero point for per layer quantization
+        if num_channels == 1:
+            scales = scales[0]
+            zero_point = zero_point[0]
+            attrs = {}
         else:
-            zero_point = np.uint8(0)
+            utils.make_sure(axis, "Axis must be specified for per channel quantization")
+            attrs = {'axis': axis}
 
         # Split it into QuantizeLinear and DequantizeLinear and remove the QDQ node reference
-        y_quant_scale = g.make_const(name=utils.make_name("y_quant_scale"), np_val=1/scale)
+        inverse_scale = (1/scales).astype(np.float32)
+        y_quant_scale = g.make_const(name=utils.make_name("y_quant_scale"), np_val=inverse_scale)
         y_zero_point = g.make_const(name=utils.make_name("y_zero_point"), np_val=zero_point)
         quant_node = g.make_node(op_type="QuantizeLinear",
                                  inputs=[qdq_node.input[0], y_quant_scale.output[0],
                                          y_zero_point.output[0]],
                                  shapes=[qdq_node_output_shape],
+                                 attr=attrs,
                                  dtypes=[qdq_node_output_dtype],
                                  name=utils.make_name("QuantLinearNode"))
 
         g.set_shape(quant_node.output[0], qdq_node_output_shape)
 
         g.remove_node(qdq_node.name)
 
-        y_dequant_scale = g.make_const(name=utils.make_name("y_dequant_scale"), np_val=1/scale)
+        y_dequant_scale = g.make_const(name=utils.make_name("y_dequant_scale"), np_val=inverse_scale)
         y_inv_zero_point = g.make_const(name=utils.make_name("y_inv_zero_point"), np_val=zero_point)
         dequant_node = g.make_node(op_type="DequantizeLinear",
                                    inputs=[quant_node.output[0], y_dequant_scale.output[0],
                                            y_inv_zero_point.output[0]],
                                    outputs=[qdq_node.output[0]],
                                    shapes=[qdq_node_output_shape],
+                                   attr=attrs,
                                    dtypes=[qdq_node_output_dtype],
                                    name=utils.make_name("DequantLinearNode"))
         g.set_shape(dequant_node.output[0], qdq_node_output_shape)
diff --git a/tf2onnx/schemas.py b/tf2onnx/schemas.py
@@ -136,7 +136,7 @@ def build_onnx_op(node):
                 copied_sub_graph = copy.deepcopy(sub_graph)
                 graph_proto = copied_sub_graph.make_graph("graph for " + node.name + " " + attr_name)
                 attr.append(helper.make_attribute(attr_name, graph_proto))
-        attr.extend(node.attr_onnx.values())
+        attr.extend(node.get_onnx_attrs().values())
         if attr:
             onnx_node.attribute.extend(attr)
         return onnx_node
diff --git a/tf2onnx/tf_utils.py b/tf2onnx/tf_utils.py
@@ -124,8 +124,24 @@ def get_tf_node_attr(node, name):
 def get_tf_version():
     return LooseVersion(tf.__version__)
 
-
-def tflist_to_onnx(g, shape_override):
+def compress_graph_def(graph_def):
+    """
+    Remove large const values from graph. This lets us import the graph and run shape inference without TF crashing.
+    """
+    node_defs = list(graph_def.node)
+    const_node_values = {}
+    for node_def in node_defs:
+        if node_def.op == 'Const':
+            tensor = node_def.attr["value"].tensor
+            # Small constants are sometimes used to store shape information and must be maintained
+            if len(tensor.tensor_content) > 1000:
+                make_sure(node_def.name not in const_node_values, "Two nodes in graph have same name %s", node_def.name)
+                const_node_values[node_def.name] = tensor.tensor_content
+                tensor.tensor_content = b''
+    return const_node_values
+
+
+def tflist_to_onnx(g, shape_override, const_node_values=None):
     """
     Convert the tf-node list into an onnx graph with minimal rewrites so
     we can use the onnx graph as intermediate graph.
@@ -193,7 +209,10 @@ def tflist_to_onnx(g, shape_override):
                 attr[a] = nattr.name
                 functions[nattr.name] = input_shapes
             elif a == "value":
-                onnx_tensor = tf_to_onnx_tensor(get_tf_node_attr(node, a), name=port_name(node.name))
+                tensor = get_tf_node_attr(node, a)
+                if const_node_values and node.name in const_node_values:
+                    tensor.tensor_content = const_node_values[node.name]
+                onnx_tensor = tf_to_onnx_tensor(tensor, name=port_name(node.name))
                 attr[a] = onnx_tensor
             elif a == "DstT":
                 attr["to"] = map_tf_dtype(get_tf_node_attr(node, "DstT"))
@@ -217,8 +236,8 @@ def tflist_to_onnx(g, shape_override):
     return onnx_nodes, op_cnt, attr_cnt, output_shapes, dtypes, functions
 
 
-def tensorflow_to_onnx(graph, shape_override):
+def tensorflow_to_onnx(graph, shape_override, const_node_values=None):
     """
     Load tensorflow graph and do a conversion.
     """
-    return tflist_to_onnx(graph, shape_override)
+    return tflist_to_onnx(graph, shape_override, const_node_values)
diff --git a/tf2onnx/utils.py b/tf2onnx/utils.py
@@ -405,9 +405,9 @@ def is_same(node_1, node_2):
         if node_1.type != node_2.type:
             return False
         # check onnx attributes
-        if node_1.attr_onnx.keys() != node_2.attr_onnx.keys():
+        if node_1.get_onnx_attrs().keys() != node_2.get_onnx_attrs().keys():
             return False
-        for name in node_1.attr_onnx.keys(): # pylint: disable=consider-iterating-dictionary
+        for name in node_1.get_onnx_attrs().keys(): # pylint: disable=consider-iterating-dictionary
             if node_1.get_attr_value(name) != node_2.get_attr_value(name):
                 return False
         return True