Add unnittest for QLinearConvTranspose shape inference

mapetre · ndrego · commit c51a23f9990f · 2023-09-21T13:06:19.000-07:00
diff --git a/onnxruntime/python/tools/symbolic_shape_infer.py b/onnxruntime/python/tools/symbolic_shape_infer.py
@@ -942,6 +942,8 @@ def _infer_qlinear_binary_op(self, node):
         # For qlinear binary operators the input order is
         # [inp_0, inp_0_scale, inp_0_zp, inp_1, inp_1_scale, inp_1_zp, out_scale, out_zp]
         # and we want to preserve [inp_0, inp_1]
+        # This also applies to operators where the shape is determined by the input and weight shapes, such as
+        # QLinearConv and QLinearConvTranspose
         # https://github.com/quadric-io/onnxruntime/blob/main/docs/ContribOperators.md#com.microsoft.QLinearAdd
         prequant_input_idx = [0, 3]
         self._qlinear_onnx_shape_infer(node, prequant_input_idx)
diff --git a/onnxruntime/test/python/quantization/test_quant_shape_inference.py b/onnxruntime/test/python/quantization/test_quant_shape_inference.py
@@ -325,6 +325,46 @@ def test_shape_qlinear_softmax(self):
                          [1, 32],
                          "Wrong shape inferred for quantized network output")
 
+    def test_shape_qlinear_conv_transpose(self):
+        model = self.get_model(
+            helper.make_node(
+                "QLinearConvTranspose",
+                inputs=[
+                    "input",
+                    "input_scale",
+                    "input_zero_point",
+                    "conv_transpose_wt_quantized",
+                    "weight_scale",
+                    "weight_zero_point",
+                    "conv_transpose_out_scale",
+                    "conv_transpose_out_zero_point",
+                    "conv_transpose_bias"
+                ],
+                outputs=["output"],
+                name="quant_node",
+                domain="com.microsoft",
+                auto_pad=b'NOTSET',
+                dilations=[1, 1],
+                group=1,
+                kernel_shape=[2, 2],
+                pads=[0, 0, 0, 0],
+                strides=[2, 2]
+            ),
+            [1, 32, 14, 14],
+            [
+                numpy_helper.from_array(np.array(0.007874015718698502, dtype="float32"), name="input_scale"),
+                numpy_helper.from_array(np.array(0, dtype="int8"), name="input_zero_point"),
+                numpy_helper.from_array(np.ones([32, 64, 2, 2]).astype("int8"), name="conv_transpose_wt_quantized"),
+                numpy_helper.from_array(np.array(0.007874015718698502, dtype="float32"), name="weight_scale"),
+                numpy_helper.from_array(np.array(0, dtype="int8"), name="weight_zero_point"),
+                numpy_helper.from_array(np.array(0.007874015718698502, dtype="float32"), name="conv_transpose_out_scale"),
+                numpy_helper.from_array(np.array(0, dtype="int8"), name="conv_transpose_out_zero_point"),
+                numpy_helper.from_array(np.ones([64]).astype("int32"), name="conv_transpose_bias"),
+            ]
+        )
+        self.assertEqual(self.infer_out_shape(model),
+                         [1, 64, 28, 28],
+                         "Wrong shape inferred for quantized network output")
 
 if __name__ == "__main__":
     unittest.main()