Added unittest

gcunhase · gcunhase · commit 47e38922b4a6 · 2025-10-21T09:57:06.000-04:00
Signed-off-by: gcunhase &lt;4861122+gcunhase@users.noreply.github.com&gt;
diff --git a/tests/_test_utils/onnx_quantization/lib_test_models.py b/tests/_test_utils/onnx_quantization/lib_test_models.py
@@ -555,3 +555,128 @@ def build_convtranspose_conv_residual_model():
     onnx.checker.check_model(model_inferred)
 
     return model_inferred
+
+
+def build_conv_act_pool_model():
+    # Define your model inputs and outputs
+    input_names = ["input_0"]
+    output_names = ["output_0"]
+    input_shapes = [(32, 64, 256, 256)]
+    output_shapes = [(32, 128, 128, 128)]
+
+    inputs = [
+        helper.make_tensor_value_info(input_name, onnx.TensorProto.FLOAT, input_shape)
+        for input_name, input_shape in zip(input_names, input_shapes)
+    ]
+    outputs = [
+        helper.make_tensor_value_info(output_name, onnx.TensorProto.FLOAT, output_shape)
+        for output_name, output_shape in zip(output_names, output_shapes)
+    ]
+
+    # Create the ONNX graph with the nodes
+    nodes = [
+        helper.make_node(
+            op_type="Conv",
+            inputs=["input_0", "weights_1", "bias_1"],
+            outputs=["conv1_conv/Conv2D:0"],
+            name="conv1_conv/Conv2D",
+            dilations=[1, 1],
+            group=1,
+            kernel_shape=[3, 3],
+            pads=[0, 0, 0, 0],
+            strides=[1, 1],
+        ),
+        helper.make_node(
+            op_type="BatchNormalization",
+            inputs=["conv1_conv/Conv2D:0", "bn1_scale", "bn1_bias", "bn1_mean", "bn1_var"],
+            outputs=["bn1_batchnorm/BatchNormalization:0"],
+            name="bn1_batchnorm/BatchNormalization",
+        ),
+        helper.make_node(
+            op_type="Relu",
+            inputs=["bn1_batchnorm/BatchNormalization:0"],
+            outputs=["relu1_relu/Relu:0"],
+            name="relu1_relu/Relu",
+        ),
+        helper.make_node(
+            op_type="MaxPool",
+            inputs=["relu1_relu/Relu:0"],
+            outputs=["maxpool1_maxpool/MaxPool2D:0"],
+            name="maxpool1_maxpool/MaxPool2D",
+            ceil_mode=False,
+            kernel_shape=[3, 3],
+            pads=[0, 0, 0, 0],
+            strides=[2, 2],
+        ),
+        helper.make_node(
+            op_type="Conv",
+            inputs=["maxpool1_maxpool/MaxPool2D:0", "weights_2"],
+            outputs=["output_0"],
+            name="conv2_conv/Conv2D",
+            dilations=[1, 1],
+            group=1,
+            kernel_shape=[3, 3],
+            pads=[0, 0, 0, 0],
+            strides=[1, 1],
+        ),
+    ]
+
+    # Create the ONNX initializers
+    initializers = [
+        helper.make_tensor(
+            name="weights_1",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(128, 64, 3, 3),
+            vals=np.random.uniform(low=0.5, high=1.0, size=128 * 64 * 3 * 3),
+        ),
+        helper.make_tensor(
+            name="bias_1",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(128,),
+            vals=np.random.uniform(low=0.5, high=1.0, size=128),
+        ),
+        helper.make_tensor(
+            name="bn1_scale",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(128,),
+            vals=np.random.uniform(low=0.5, high=1.0, size=128),
+        ),
+        helper.make_tensor(
+            name="bn1_bias",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(128,),
+            vals=np.random.uniform(low=0.5, high=1.0, size=128),
+        ),
+        helper.make_tensor(
+            name="bn1_mean",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(128,),
+            vals=np.random.uniform(low=0.5, high=1.0, size=128),
+        ),
+        helper.make_tensor(
+            name="bn1_var",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(128,),
+            vals=np.random.uniform(low=0.5, high=1.0, size=128),
+        ),
+        helper.make_tensor(
+            name="weights_2",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(128, 128, 3, 3),
+            vals=np.random.uniform(low=0.5, high=1.0, size=128 * 128 * 3 * 3),
+        ),
+    ]
+
+    # Create the ONNX graph with the nodes and initializers
+    graph = helper.make_graph(nodes, "conv_act_pool", inputs, outputs, initializer=initializers)
+
+    # Create the ONNX model
+    model = helper.make_model(graph)
+    model.opset_import[0].version = 13
+    model.ir_version = 10
+
+    # Check the ONNX model
+    model_inferred = onnx.shape_inference.infer_shapes(model)
+    onnx.checker.check_model(model_inferred)
+
+    return model_inferred
diff --git a/tests/unit/onnx/test_quantize_int8.py b/tests/unit/onnx/test_quantize_int8.py
@@ -21,6 +21,7 @@
 import torch
 from _test_utils.onnx_quantization.lib_test_models import (
     SimpleMLP,
+    build_conv_act_pool_model,
     build_convtranspose_conv_residual_model,
     export_as_onnx,
 )
@@ -29,13 +30,18 @@
 from modelopt.onnx.utils import save_onnx
 
 
-def _assert_nodes_are_quantized(nodes):
+def _assert_nodes_quantization(nodes, should_be_quantized=True):
     for node in nodes:
         for inp_idx, inp in enumerate(node.inputs):
             if isinstance(inp, gs.Variable):
-                assert node.i(inp_idx).op == "DequantizeLinear", (
-                    f"Input '{inp.name}' of node '{node.name}' is not quantized but should be!"
-                )
+                if should_be_quantized:
+                    assert node.i(inp_idx).op == "DequantizeLinear", (
+                        f"Input '{inp.name}' of node '{node.name}' is not quantized but should be!"
+                    )
+                else:
+                    assert node.i(inp_idx).op != "DequantizeLinear", (
+                        f"Input '{inp.name}' of node '{node.name}' is quantized but should not be!"
+                    )
     return True
 
 
@@ -59,7 +65,7 @@ def test_int8(tmp_path, high_precision_dtype):
 
     # Check that all MatMul nodes are quantized
     mm_nodes = [n for n in graph.nodes if n.op == "MatMul"]
-    assert _assert_nodes_are_quantized(mm_nodes)
+    assert _assert_nodes_quantization(mm_nodes)
 
 
 def test_convtranspose_conv_residual_int8(tmp_path):
@@ -80,7 +86,7 @@ def test_convtranspose_conv_residual_int8(tmp_path):
 
     # Check that Conv and ConvTransposed are quantized
     conv_nodes = [n for n in graph.nodes if "Conv" in n.op]
-    assert _assert_nodes_are_quantized(conv_nodes)
+    assert _assert_nodes_quantization(conv_nodes)
 
     # Check that only 1 input of Add is quantized
     add_nodes = [n for n in graph.nodes if n.op == "Add"]
@@ -89,3 +95,28 @@ def test_convtranspose_conv_residual_int8(tmp_path):
         assert len(quantized_inputs) == 1, (
             f"More than one input of {node.name} is being quantized, but only one should be quantized!"
         )
+
+
+def test_conv_act_pool_int8(tmp_path):
+    onnx_model = build_conv_act_pool_model()
+    onnx_path = os.path.join(tmp_path, "conv_act_pool_model.onnx")
+    save_onnx(onnx_model, onnx_path)
+
+    moq.quantize(onnx_path, quantize_mode="int8", high_precision_dtype="fp16")
+
+    # Output model should be produced in the same tmp_path
+    output_onnx_path = onnx_path.replace(".onnx", ".quant.onnx")
+
+    # Check that quantized explicit model is generated
+    assert os.path.isfile(output_onnx_path)
+
+    # Load the output model and check QDQ node placements
+    graph = gs.import_onnx(onnx.load(output_onnx_path))
+
+    # Check that Conv is quantized
+    conv_nodes = [n for n in graph.nodes if n.op == "Conv"]
+    assert _assert_nodes_quantization(conv_nodes)
+
+    # Check that MaxPool is not quantized
+    pool_nodes = [n for n in graph.nodes if n.op == "MaxPool"]
+    assert _assert_nodes_quantization(pool_nodes, should_be_quantized=False)