Merge pull request #919 from peri044/qdq_rewriter

guschmue · web-flow · commit 68d7b88bc2ac · 2020-05-15T16:22:40.000-07:00
Support for QuantizeAndDequantize operation
diff --git a/tests/test_backend.py b/tests/test_backend.py
@@ -62,6 +62,7 @@
     fused_batch_norm = tf.compat.v1.nn.fused_batch_norm
     dropout = tf.compat.v1.nn.dropout
     resize_nearest_neighbor = tf.compat.v1.image.resize_nearest_neighbor
+    quantize_and_dequantize = tf.quantization.quantize_and_dequantize
     resize_bilinear = tf.compat.v1.image.resize_bilinear
     is_nan = tf.math.is_nan
     is_inf = tf.math.is_inf
@@ -77,6 +78,7 @@
     random_uniform = tf.compat.v1.random_uniform
     fused_batch_norm = tf.compat.v1.nn.fused_batch_norm
     dropout = tf.compat.v1.nn.dropout
+    quantize_and_dequantize = tf.compat.v1.quantization.quantize_and_dequantize
     resize_nearest_neighbor = tf.compat.v1.image.resize_nearest_neighbor
     resize_bilinear = tf.compat.v1.image.resize_bilinear
     is_nan = tf.math.is_nan
@@ -1916,6 +1918,26 @@ def graph_validator(g):
 
         self._run_test_case(func_fusedbn, [_OUTPUT], {_INPUT: x_val}, rtol=1e-05, graph_validator=graph_validator)
 
+    @check_tf_min_version("1.15")
+    @check_opset_min_version(10, "quantize_and_dequantize")
+    def test_qdq_unsigned_input(self):
+        x_shape = [3, 3, 2]
+        x_val = np.arange(1, 1+np.prod(x_shape)).astype("float32").reshape(x_shape)
+        def func(x):
+            x_ = quantize_and_dequantize(x, 1.0, 6.0, signed_input=False, range_given=True)
+            return tf.identity(x_, name=_TFOUTPUT)
+        _ = self._run_test_case(func, [_OUTPUT], {_INPUT: x_val})
+
+    @check_tf_min_version("1.15")
+    @check_opset_min_version(10, "quantize_and_dequantize")
+    def test_qdq_signed_input(self):
+        x_shape = [3, 3, 2]
+        x_val = np.arange(-np.prod(x_shape)/2, np.prod(x_shape)/2).astype("float32").reshape(x_shape)
+        def func(x):
+            x_ = quantize_and_dequantize(x, -6.0, 6.0, signed_input=True, narrow_range=True, range_given=True)
+            return tf.identity(x_, name=_TFOUTPUT)
+        _ = self._run_test_case(func, [_OUTPUT], {_INPUT: x_val})
+
     @skip_caffe2_backend()
     @check_opset_min_version(7, "resize_nearest_neighbor")
     def test_resize_nearest_neighbor(self):
diff --git a/tf2onnx/rewriter/__init__.py b/tf2onnx/rewriter/__init__.py
@@ -21,6 +21,7 @@
 from tf2onnx.rewriter.thresholded_relu_rewriter import rewrite_thresholded_relu
 from tf2onnx.rewriter.transpose_rewriter import rewrite_transpose
 from tf2onnx.rewriter.conv2d_with_add_rewriter import rewrite_biasadd_with_conv2d
+from tf2onnx.rewriter.quantization_ops_rewriter import rewrite_quantize_and_dequantize
 
 
 __all__ = [
@@ -43,4 +44,5 @@
     "rewrite_custom_rnn_cell",
     "rewrite_generic_loop",
     "rewrite_biasadd_with_conv2d",
+    "rewrite_quantize_and_dequantize"
 ]
diff --git a/tf2onnx/rewriter/quantization_ops_rewriter.py b/tf2onnx/rewriter/quantization_ops_rewriter.py
@@ -0,0 +1,106 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+"""
+tf2onnx.rewriter - rewrite tensorflow QuantizeAndDequantizeV3 op
+"""
+
+import numpy as np
+from tf2onnx.graph_matcher import OpTypePattern, GraphMatcher
+from tf2onnx import utils
+
+# pylint: disable=missing-docstring
+
+def extract_numpy_array(node):
+    return np.frombuffer(node.attr["value"].t.raw_data, dtype="float32")
+
+def create_qdq_nodes(g, match_results):
+
+    for match in match_results:
+        qdq_node = match.get_op('output')
+        qdq_node_output_dtype = g.get_dtype(qdq_node.output[0])
+        qdq_node_output_shape = g.get_shape(qdq_node.output[0])
+
+        # Get the attributes of qdq node
+        narrow_range = qdq_node.attr['narrow_range'].i
+        signed_input = qdq_node.attr['signed_input'].i
+
+        min_quantized, max_quantized = [-127, 127]
+        if not narrow_range and signed_input:
+            min_quantized = -128
+
+        if not signed_input:
+            min_quantized, max_quantized = [0, 255]
+
+        # Get the min and max value of the inputs to QDQ op
+        min_value = extract_numpy_array(qdq_node.inputs[1])
+        max_value = extract_numpy_array(qdq_node.inputs[2])
+
+        # Calculate scales from the min and max values
+        scale_from_min_side = min_quantized/min_value if min_quantized*min_value > 0 else max_quantized
+        scale_from_max_side = max_quantized/max_value if max_quantized*max_value > 0 else max_quantized
+
+        if scale_from_min_side < scale_from_max_side:
+            scale = scale_from_min_side
+        else:
+            scale = scale_from_max_side
+
+        utils.make_sure(scale > 0, "Quantize/Dequantize scale must be greater than zero")
+
+        if signed_input:
+            zero_point = np.int8(0)
+        else:
+            zero_point = np.uint8(0)
+
+        # Split it into QuantizeLinear and DequantizeLinear and remove the QDQ node reference
+        y_quant_scale = g.make_const(name=utils.make_name("y_quant_scale"), np_val=1/scale)
+        y_zero_point = g.make_const(name=utils.make_name("y_zero_point"), np_val=zero_point)
+        quant_node = g.make_node(op_type="QuantizeLinear",
+                                 inputs=[qdq_node.input[0], y_quant_scale.output[0],
+                                         y_zero_point.output[0]],
+                                 shapes=[qdq_node_output_shape],
+                                 dtypes=[qdq_node_output_dtype],
+                                 name=utils.make_name("QuantLinearNode"))
+
+        g.set_shape(quant_node.output[0], qdq_node_output_shape)
+
+        g.remove_node(qdq_node.name)
+
+        y_dequant_scale = g.make_const(name=utils.make_name("y_dequant_scale"), np_val=1/scale)
+        y_inv_zero_point = g.make_const(name=utils.make_name("y_inv_zero_point"), np_val=zero_point)
+        dequant_node = g.make_node(op_type="DequantizeLinear",
+                                   inputs=[quant_node.output[0], y_dequant_scale.output[0],
+                                           y_inv_zero_point.output[0]],
+                                   outputs=[qdq_node.output[0]],
+                                   shapes=[qdq_node_output_shape],
+                                   dtypes=[qdq_node_output_dtype],
+                                   name=utils.make_name("DequantLinearNode"))
+        g.set_shape(dequant_node.output[0], qdq_node_output_shape)
+
+    return g.get_nodes()
+
+def rewrite_quantize_and_dequantize(g, ops):
+
+    pattern_for_qdq_v2 = \
+        OpTypePattern('QuantizeAndDequantizeV2', name='output', inputs=[
+            OpTypePattern("*"),
+            OpTypePattern(None),
+            OpTypePattern(None),
+        ])
+    pattern_for_qdq_v3 = \
+        OpTypePattern('QuantizeAndDequantizeV3', name='output', inputs=[
+            OpTypePattern("*"),
+            OpTypePattern(None),
+            OpTypePattern(None),
+            OpTypePattern(None),
+        ])
+
+    # Match all the patterns for QDQ ops
+    patterns = [pattern_for_qdq_v3, pattern_for_qdq_v2]
+    match_results = []
+    for pattern in patterns:
+        matcher = GraphMatcher(pattern)
+        results = list(matcher.match_ops(ops))
+        match_results.extend(results)
+
+    return create_qdq_nodes(g, match_results)
diff --git a/tf2onnx/tfonnx.py b/tf2onnx/tfonnx.py
@@ -459,7 +459,7 @@ def compat_handler(ctx, node, **kwargs):
 
     # pre-processing graph rewrites
     # bi-directional re-writer should be placed after single directional re-writer
-    rewriters = [rewrite_transpose, rewrite_flatten, rewrite_gemm,
+    rewriters = [rewrite_quantize_and_dequantize, rewrite_transpose, rewrite_flatten, rewrite_gemm,
                  rewrite_random_uniform, rewrite_random_uniform_fold_const,
                  rewrite_random_normal, rewrite_dropout, rewrite_eye,
                  rewrite_leakyrelu, rewrite_thresholded_relu, rewrite_conv2d_with_pad,