Merge remote-tracking branch 'upstream/master' into jignparm/activate_opset12

jignparm · jignparm · commit 49a9b2d2e53e · 2020-05-21T04:28:04.000Z
diff --git a/tests/test_backend.py b/tests/test_backend.py
@@ -62,6 +62,7 @@
     fused_batch_norm = tf.compat.v1.nn.fused_batch_norm
     dropout = tf.compat.v1.nn.dropout
     resize_nearest_neighbor = tf.compat.v1.image.resize_nearest_neighbor
+    quantize_and_dequantize = tf.quantization.quantize_and_dequantize
     resize_bilinear = tf.compat.v1.image.resize_bilinear
     is_nan = tf.math.is_nan
     is_inf = tf.math.is_inf
@@ -77,6 +78,7 @@
     random_uniform = tf.compat.v1.random_uniform
     fused_batch_norm = tf.compat.v1.nn.fused_batch_norm
     dropout = tf.compat.v1.nn.dropout
+    quantize_and_dequantize = tf.compat.v1.quantization.quantize_and_dequantize
     resize_nearest_neighbor = tf.compat.v1.image.resize_nearest_neighbor
     resize_bilinear = tf.compat.v1.image.resize_bilinear
     is_nan = tf.math.is_nan
@@ -1916,6 +1918,26 @@ def graph_validator(g):
 
         self._run_test_case(func_fusedbn, [_OUTPUT], {_INPUT: x_val}, rtol=1e-05, graph_validator=graph_validator)
 
+    @check_tf_min_version("1.15")
+    @check_opset_min_version(10, "quantize_and_dequantize")
+    def test_qdq_unsigned_input(self):
+        x_shape = [3, 3, 2]
+        x_val = np.arange(1, 1+np.prod(x_shape)).astype("float32").reshape(x_shape)
+        def func(x):
+            x_ = quantize_and_dequantize(x, 1.0, 6.0, signed_input=False, range_given=True)
+            return tf.identity(x_, name=_TFOUTPUT)
+        _ = self._run_test_case(func, [_OUTPUT], {_INPUT: x_val})
+
+    @check_tf_min_version("1.15")
+    @check_opset_min_version(10, "quantize_and_dequantize")
+    def test_qdq_signed_input(self):
+        x_shape = [3, 3, 2]
+        x_val = np.arange(-np.prod(x_shape)/2, np.prod(x_shape)/2).astype("float32").reshape(x_shape)
+        def func(x):
+            x_ = quantize_and_dequantize(x, -6.0, 6.0, signed_input=True, narrow_range=True, range_given=True)
+            return tf.identity(x_, name=_TFOUTPUT)
+        _ = self._run_test_case(func, [_OUTPUT], {_INPUT: x_val})
+
     @skip_caffe2_backend()
     @check_opset_min_version(7, "resize_nearest_neighbor")
     def test_resize_nearest_neighbor(self):
diff --git a/tf2onnx/graph.py b/tf2onnx/graph.py
@@ -1067,7 +1067,9 @@ def make_onnx_graph_io(self, ids):
             shape = self.get_shape(name)
 
             utils.make_sure(dtype is not None, "missing output dtype for " + name)
-            utils.make_sure(shape is not None, "missing output shape for " + name)
+            # TODO: allow None output shape or not? e.g. shape=(?,)
+            #utils.make_sure(shape is not None, "missing output shape for " + name)
+            if shape is None: logger.warning("missing output shape for %s", name)
 
             v = utils.make_onnx_inputs_outputs(name, dtype, shape)
             tensor_value_infos.append(v)
diff --git a/tf2onnx/onnx_opset/tensor.py b/tf2onnx/onnx_opset/tensor.py
@@ -196,6 +196,7 @@ def version_1(cls, ctx, node, **kwargs):
             shape = ctx.get_shape(node.input[0])
             utils.make_sure(shape is not None, "squeeze input shape cannot be None")
             axis = [i for i, j in enumerate(shape) if j == 1]
+            if not axis: axis = [0]
         node.set_attr("axes", axis)
 
     @classmethod
@@ -1772,9 +1773,12 @@ class MatrixDiagPart:
     def version_11(cls, ctx, node, **kwargs):
         # MatrixDiagPart by slice and gather
         const_zero = ctx.make_const(utils.make_name(node.name) + 'const_zero', np.array([0]).astype(np.int64))
+        const_zero_ = ctx.make_const(utils.make_name(node.name) + 'const_zero_', np.array(0).astype(np.int64))
+
         const_zero_zero = ctx.make_const(utils.make_name(node.name) + 'const_zero_zero',
                                          np.array([0, 0]).astype(np.int64))
         const_one = ctx.make_const(utils.make_name(node.name) + 'const_one', np.array([1]).astype(np.int64))
+        const_one_ = ctx.make_const(utils.make_name(node.name) + 'const_one_', np.array(1).astype(np.int64))
         const_two = ctx.make_const(utils.make_name(node.name) + 'const_two', np.array([2]).astype(np.int64))
         const_negative_one = ctx.make_const(utils.make_name(node.name) + 'const_negative_one',
                                             np.array([-1]).astype(np.int64))
@@ -1802,7 +1806,9 @@ def version_11(cls, ctx, node, **kwargs):
                                                           const_negative_one.output[0]])
         sliced_input_shape_new = ctx.make_node('Concat', [sliced_input_shape_half.output[0], const_one.output[0]],
                                                attr={'axis': -1})
-        matrice_range = ctx.make_node('Range', [const_zero.output[0], min_matrice_dim.output[0], const_one.output[0]])
+        min_matrice_dim_ = ctx.make_node('Squeeze', [min_matrice_dim.output[0]], {'axes': [0]})
+        matrice_range = ctx.make_node('Range', [const_zero_.output[0], min_matrice_dim_.output[0],
+                                                const_one_.output[0]])
         unsqueezed_matrice_range = ctx.make_node('Unsqueeze', [matrice_range.output[0]], attr={"axes": [-1]})
         expanded_range = ctx.make_node('Expand', [unsqueezed_matrice_range.output[0], sliced_input_shape_new.output[0]])
         gathered_result = ctx.make_node('GatherElements', [sliced_input.output[0], expanded_range.output[0]],
@@ -1893,6 +1899,8 @@ def version_11(cls, ctx, node, **kwargs):
         new_width = body_graph.make_node('Slice', [processed_shape.output[0], const_neg_one.output[0],
                                                    shape_processed_shape.output[0]])
         abs_k = body_graph.make_node('Abs', [current_k.output[0]])
+
+
         range_k = body_graph.make_node('Range', [abs_k.output[0], new_width.output[0], const_one.output[0]],
                                        domain="com.microsoft")
         sliced_range = body_graph.make_node('Slice', [range_k.output[0], const_zero.output[0], new_depth.output[0]])
diff --git a/tf2onnx/optimizer/transpose_optimizer.py b/tf2onnx/optimizer/transpose_optimizer.py
@@ -175,8 +175,10 @@ def _initialize_handlers(self):
             "Clip": self._simple_through_handler,
             "Concat": self._concat_handler,
             "Elu": self._simple_through_handler,
+            "Exp": self._simple_through_handler,
             "Identity": self._identity_handler,
             "LeakyRelu": self._simple_through_handler,
+            "Log": self._simple_through_handler,
             "Max": self._maxmin_handler,
             "Min": self._maxmin_handler,
             "Mul": self._mul_handler,
diff --git a/tf2onnx/rewriter/__init__.py b/tf2onnx/rewriter/__init__.py
@@ -21,6 +21,7 @@
 from tf2onnx.rewriter.thresholded_relu_rewriter import rewrite_thresholded_relu
 from tf2onnx.rewriter.transpose_rewriter import rewrite_transpose
 from tf2onnx.rewriter.conv2d_with_add_rewriter import rewrite_biasadd_with_conv2d
+from tf2onnx.rewriter.quantization_ops_rewriter import rewrite_quantize_and_dequantize
 
 
 __all__ = [
@@ -43,4 +44,5 @@
     "rewrite_custom_rnn_cell",
     "rewrite_generic_loop",
     "rewrite_biasadd_with_conv2d",
+    "rewrite_quantize_and_dequantize"
 ]
diff --git a/tf2onnx/rewriter/quantization_ops_rewriter.py b/tf2onnx/rewriter/quantization_ops_rewriter.py
@@ -0,0 +1,106 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+"""
+tf2onnx.rewriter - rewrite tensorflow QuantizeAndDequantizeV3 op
+"""
+
+import numpy as np
+from tf2onnx.graph_matcher import OpTypePattern, GraphMatcher
+from tf2onnx import utils
+
+# pylint: disable=missing-docstring
+
+def extract_numpy_array(node):
+    return np.frombuffer(node.attr["value"].t.raw_data, dtype="float32")
+
+def create_qdq_nodes(g, match_results):
+
+    for match in match_results:
+        qdq_node = match.get_op('output')
+        qdq_node_output_dtype = g.get_dtype(qdq_node.output[0])
+        qdq_node_output_shape = g.get_shape(qdq_node.output[0])
+
+        # Get the attributes of qdq node
+        narrow_range = qdq_node.attr['narrow_range'].i
+        signed_input = qdq_node.attr['signed_input'].i
+
+        min_quantized, max_quantized = [-127, 127]
+        if not narrow_range and signed_input:
+            min_quantized = -128
+
+        if not signed_input:
+            min_quantized, max_quantized = [0, 255]
+
+        # Get the min and max value of the inputs to QDQ op
+        min_value = extract_numpy_array(qdq_node.inputs[1])
+        max_value = extract_numpy_array(qdq_node.inputs[2])
+
+        # Calculate scales from the min and max values
+        scale_from_min_side = min_quantized/min_value if min_quantized*min_value > 0 else max_quantized
+        scale_from_max_side = max_quantized/max_value if max_quantized*max_value > 0 else max_quantized
+
+        if scale_from_min_side < scale_from_max_side:
+            scale = scale_from_min_side
+        else:
+            scale = scale_from_max_side
+
+        utils.make_sure(scale > 0, "Quantize/Dequantize scale must be greater than zero")
+
+        if signed_input:
+            zero_point = np.int8(0)
+        else:
+            zero_point = np.uint8(0)
+
+        # Split it into QuantizeLinear and DequantizeLinear and remove the QDQ node reference
+        y_quant_scale = g.make_const(name=utils.make_name("y_quant_scale"), np_val=1/scale)
+        y_zero_point = g.make_const(name=utils.make_name("y_zero_point"), np_val=zero_point)
+        quant_node = g.make_node(op_type="QuantizeLinear",
+                                 inputs=[qdq_node.input[0], y_quant_scale.output[0],
+                                         y_zero_point.output[0]],
+                                 shapes=[qdq_node_output_shape],
+                                 dtypes=[qdq_node_output_dtype],
+                                 name=utils.make_name("QuantLinearNode"))
+
+        g.set_shape(quant_node.output[0], qdq_node_output_shape)
+
+        g.remove_node(qdq_node.name)
+
+        y_dequant_scale = g.make_const(name=utils.make_name("y_dequant_scale"), np_val=1/scale)
+        y_inv_zero_point = g.make_const(name=utils.make_name("y_inv_zero_point"), np_val=zero_point)
+        dequant_node = g.make_node(op_type="DequantizeLinear",
+                                   inputs=[quant_node.output[0], y_dequant_scale.output[0],
+                                           y_inv_zero_point.output[0]],
+                                   outputs=[qdq_node.output[0]],
+                                   shapes=[qdq_node_output_shape],
+                                   dtypes=[qdq_node_output_dtype],
+                                   name=utils.make_name("DequantLinearNode"))
+        g.set_shape(dequant_node.output[0], qdq_node_output_shape)
+
+    return g.get_nodes()
+
+def rewrite_quantize_and_dequantize(g, ops):
+
+    pattern_for_qdq_v2 = \
+        OpTypePattern('QuantizeAndDequantizeV2', name='output', inputs=[
+            OpTypePattern("*"),
+            OpTypePattern(None),
+            OpTypePattern(None),
+        ])
+    pattern_for_qdq_v3 = \
+        OpTypePattern('QuantizeAndDequantizeV3', name='output', inputs=[
+            OpTypePattern("*"),
+            OpTypePattern(None),
+            OpTypePattern(None),
+            OpTypePattern(None),
+        ])
+
+    # Match all the patterns for QDQ ops
+    patterns = [pattern_for_qdq_v3, pattern_for_qdq_v2]
+    match_results = []
+    for pattern in patterns:
+        matcher = GraphMatcher(pattern)
+        results = list(matcher.match_ops(ops))
+        match_results.extend(results)
+
+    return create_qdq_nodes(g, match_results)
diff --git a/tf2onnx/tfonnx.py b/tf2onnx/tfonnx.py
@@ -54,21 +54,15 @@ def rewrite_constant_fold(g, ops):
         "Sqrt": np.sqrt,
         "Sub": np.subtract,
     }
-    ref_cnt_per_node = {}
-    for idx, op in enumerate(ops):
-        for op_input in op.inputs:
-            if op_input.name not in ref_cnt_per_node:
-                ref_cnt_per_node[op_input.name] = 0
-            ref_cnt_per_node[op_input.name] += 1
 
     # pylint: disable=too-many-nested-blocks
     keep_looking = True
     while keep_looking:
         keep_looking = False
         for idx, op in enumerate(ops):
             func = func_map.get(op.type)
-            if func is None:
-                continue
+            if func is None: continue
+            if set(op.output) & set(g.outputs): continue
             try:
                 inputs = []
                 for node in op.inputs:
@@ -109,18 +103,14 @@ def rewrite_constant_fold(g, ops):
                     old_node_name = op.name
                     logger.debug("create const node [%s] replacing [%s]", new_node_name, old_node_name)
                     ops[idx] = g.make_const(new_node_name, val)
-                    ref_cnt_per_node[new_node_name] = ref_cnt_per_node[old_node_name]
 
                     logger.debug("replace old output [%s] with new output [%s]", old_output_name, new_output_name)
                     # need to re-write the consumers input name to use the const name
                     consumers = g.find_output_consumers(old_output_name)
                     if consumers:
                         for consumer in consumers:
                             g.replace_input(consumer, old_output_name, new_output_name)
-                    for node in op.inputs:
-                        ref_cnt_per_node[node.name] -= 1
-                        if ref_cnt_per_node[node.name] == 0:
-                            g.remove_node(node.name)
+
                     # keep looking until there is nothing we can fold.
                     # We keep the graph in topological order so if we folded,
                     # the result might help a following op.
@@ -459,8 +449,8 @@ def compat_handler(ctx, node, **kwargs):
 
     # pre-processing graph rewrites
     # bi-directional re-writer should be placed after single directional re-writer
-    rewriters = [rewrite_transpose, rewrite_flatten, rewrite_gemm,
-                 rewrite_random_uniform, rewrite_random_uniform_fold_const,
+    rewriters = [rewrite_constant_fold, rewrite_quantize_and_dequantize, rewrite_transpose, rewrite_flatten,
+                 rewrite_gemm, rewrite_random_uniform, rewrite_random_uniform_fold_const,
                  rewrite_random_normal, rewrite_dropout, rewrite_eye,
                  rewrite_leakyrelu, rewrite_thresholded_relu, rewrite_conv2d_with_pad,
                  rewrite_single_direction_lstm, rewrite_bi_direction_lstm,