fix operator for fakequantize

sdpython · sdpython · commit e3bc51c688e6 · 2020-07-17T17:40:26.000+02:00
diff --git a/tests/test_backend.py b/tests/test_backend.py
@@ -3357,7 +3357,7 @@ def func(base_matrix, diag, k):
     @check_opset_min_version(10)
     @check_tf_min_version("1.14")
     def test_fakequant_with_min_max(self):
-        x_val = np.random.random(size=[4, 5]).astype(np.float32) * 2048. - 1024.
+        x_val = np.random.random(size=[3, 3]).astype(np.float32) * 2048. - 1024.
         def func(x):
             ret = fake_quant_with_min_max_args(
                 x, min=-1024, max=1024, num_bits=8, narrow_range=False, name=None)
@@ -3366,7 +3366,7 @@ def func(x):
 
 
 if __name__ == '__main__':
-    cl = BackendTests()
-    cl.setUp()
-    cl.test_fakequant_with_min_max()
+    # cl = BackendTests()
+    # cl.setUp()
+    # cl.test_fakequant_with_min_max()
     unittest_main()
diff --git a/tf2onnx/onnx_opset/quantize.py b/tf2onnx/onnx_opset/quantize.py
@@ -13,7 +13,7 @@
 import sys
 
 import numpy as np
-from onnx import onnx_pb
+from onnx import onnx_pb, numpy_helper, TensorProto
 from onnx.onnx_pb import TensorProto
 
 from tf2onnx import constants, utils
@@ -37,12 +37,19 @@ def version_11(cls, ctx, node, **kwargs):
         amax = node.get_attr("max").f
         narrow_range = node.get_attr("narrow_range").i
         num_bits = node.get_attr("num_bits").i
-        
+
         if narrow_range:
             raise RuntimeError(
                 "Unable to convert node FakeQuantWithMinMaxArgs with "
                 "narrow_range=%r" % narrow_range)
-        
+        if num_bits != 8:
+            raise RuntimeError(
+                "Unable to convert node FakeQuantWithMinMaxArgs with "
+                "num_bits=%r" % num_bits)
+
+        scale = (amax - amin) / (2 ** num_bits - 1)
+        min_adj = scale * int(amin / scale)
+        max_adj = amax + min_adj - amin
         if 0 < amin < amax:
             min_adj = 0
             max_adj = amax - amin
@@ -62,18 +69,27 @@ def version_11(cls, ctx, node, **kwargs):
 
         dtype = ctx.get_dtype(node.input[0])
         shape = ctx.get_shape(node.input[0])
+        axis = 1
+        idtype = TensorProto.UINT8
+        
+        pb_scale = ctx.make_const(
+            utils.make_name("{}_scaley".format(node.name)),
+            np.array(scale, dtype=np.float32))
+        zero_point = ctx.make_const(
+            utils.make_name("{}_zpy".format(node.name)),
+            np.array(min_adj, dtype=np.uint8))
 
         new_node = ctx.make_node(
-            "QuantizeLinear", [node.input[0], pb_scale, y_zero_point],
-            op_name_scope=node.name, attr={"axes": [axis]},
+            "QuantizeLinear", [node.input[0], pb_scale.name, zero_point.name],
+            op_name_scope=node.name, attr={"axis": axis},
             shapes=[shape], dtypes=[idtype])
         output_name = new_node.output[0]
-        node.input[i] = output_name
+        node.input[0] = output_name
 
         ctx.remove_node(node.name)
 
         last_node = ctx.make_node(
-            "DequantizeLinear", [new_node.output[0], x_scale, x_zero_point],
+            "DequantizeLinear", [new_node.output[0], pb_scale.name, zero_point.name],
             op_name_scope=node.name, attr={"axis": axis},
             shapes=[shape], dtypes=[dtype])
         ctx.replace_all_inputs(ctx.get_nodes(), node.output[0], last_node.output[0])