[5593873] [ONNX] Fix ResAdd logic to support 'Conv-BN-Sigmoid-Mul-Add' as fusible patterns (#450)

gcunhase · web-flow · commit eb9e31e9fa56 · 2025-10-22T16:42:13.000Z
Signed-off-by: gcunhase &lt;4861122+gcunhase@users.noreply.github.com&gt;
diff --git a/modelopt/onnx/quantization/graph_utils.py b/modelopt/onnx/quantization/graph_utils.py
@@ -201,6 +201,7 @@ def _get_backbone(root: Node):
             ["BatchNormalization", "BiasAdd", conv_type],
             ["Relu", "BatchNormalization", "BiasAdd", conv_type],
             ["MaxPool", "Relu", "BatchNormalization", "BiasAdd", conv_type],
+            ["Mul", "Sigmoid", "BatchNormalization", conv_type],
         ]
     for idx, path_type in enumerate(fusible_linear_path_types):
         if has_path_type(node, graph, path_type, is_forward=False, wild_card_types=[]):
diff --git a/tests/_test_utils/onnx_quantization/lib_test_models.py b/tests/_test_utils/onnx_quantization/lib_test_models.py
@@ -555,3 +555,121 @@ def build_convtranspose_conv_residual_model():
     onnx.checker.check_model(model_inferred)
 
     return model_inferred
+
+
+def build_conv_batchnorm_sig_mul_model():
+    # Define your model inputs and outputs
+    input_names = ["input_0"]
+    output_names = ["output_0"]
+    input_shapes = [(6, 48, 64, 176)]
+    output_shapes = [(6, 48, 64, 176)]
+
+    inputs = [
+        helper.make_tensor_value_info(input_name, onnx.TensorProto.FLOAT, input_shape)
+        for input_name, input_shape in zip(input_names, input_shapes)
+    ]
+    outputs = [
+        helper.make_tensor_value_info(output_name, onnx.TensorProto.FLOAT, output_shape)
+        for output_name, output_shape in zip(output_names, output_shapes)
+    ]
+
+    # Create the ONNX graph with the nodes
+    nodes = [
+        helper.make_node(
+            op_type="Relu",
+            inputs=["input_0"],
+            outputs=["relu0_relu/Relu:0"],
+            name="relu0_relu/Relu",
+        ),
+        helper.make_node(
+            op_type="Conv",
+            inputs=["relu0_relu/Relu:0", "weights_1"],
+            outputs=["conv1_conv/Conv2D:0"],
+            name="conv1_conv/Conv2D",
+            dilations=[1, 1],
+            group=1,
+            kernel_shape=[3, 3],
+            pads=[1, 1, 1, 1],
+            strides=[1, 1],
+        ),
+        helper.make_node(
+            op_type="BatchNormalization",
+            inputs=["conv1_conv/Conv2D:0", "bn1_scale", "bn1_bias", "bn1_mean", "bn1_var"],
+            outputs=["bn1_batchnorm/BatchNormalization:0"],
+            name="bn1_batchnorm/BatchNormalization",
+        ),
+        helper.make_node(
+            op_type="Sigmoid",
+            inputs=["bn1_batchnorm/BatchNormalization:0"],
+            outputs=["sig1_sigmoid/Sigmoid:0"],
+            name="sig1_sigmoid/Sigmoid",
+        ),
+        helper.make_node(
+            op_type="Mul",
+            inputs=["sig1_sigmoid/Sigmoid:0", "bn1_batchnorm/BatchNormalization:0"],
+            outputs=["mul1_mul/Mul:0"],
+            name="mul1_mul/Mul",
+        ),
+        helper.make_node(
+            op_type="Add",
+            inputs=["relu0_relu/Relu:0", "mul1_mul/Mul:0"],
+            outputs=["add1_add/Add:0"],
+            name="add1_add/Add",
+        ),
+        helper.make_node(
+            op_type="Relu",
+            inputs=["add1_add/Add:0"],
+            outputs=["output_0"],
+            name="relu2_relu/Relu",
+        ),
+    ]
+
+    # Create the ONNX initializers
+    initializers = [
+        helper.make_tensor(
+            name="weights_1",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(48, 48, 3, 3),
+            vals=np.random.uniform(low=0.5, high=1.0, size=48 * 48 * 3 * 3),
+        ),
+        helper.make_tensor(
+            name="bn1_scale",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(48,),
+            vals=np.random.uniform(low=0.5, high=1.0, size=48),
+        ),
+        helper.make_tensor(
+            name="bn1_bias",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(48,),
+            vals=np.random.uniform(low=0.5, high=1.0, size=48),
+        ),
+        helper.make_tensor(
+            name="bn1_mean",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(48,),
+            vals=np.random.uniform(low=0.5, high=1.0, size=48),
+        ),
+        helper.make_tensor(
+            name="bn1_var",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(48,),
+            vals=np.random.uniform(low=0.5, high=1.0, size=48),
+        ),
+    ]
+
+    # Create the ONNX graph with the nodes and initializers
+    graph = helper.make_graph(
+        nodes, "conv_batchnorm_sig_mul", inputs, outputs, initializer=initializers
+    )
+
+    # Create the ONNX model
+    model = helper.make_model(graph)
+    model.opset_import[0].version = 13
+    model.ir_version = 10
+
+    # Check the ONNX model
+    model_inferred = onnx.shape_inference.infer_shapes(model)
+    onnx.checker.check_model(model_inferred)
+
+    return model_inferred
diff --git a/tests/unit/onnx/test_qdq_rules_int8.py b/tests/unit/onnx/test_qdq_rules_int8.py
@@ -19,13 +19,15 @@
 import onnx
 import onnx_graphsurgeon as gs
 from _test_utils.onnx_quantization.lib_test_models import (
+    build_conv_batchnorm_sig_mul_model,
     build_r1a_model,
     build_resnet_block,
     build_resnet_block_with_downsample,
     export_as_onnx,
 )
 
 from modelopt.onnx.quantization.quantize import quantize
+from modelopt.onnx.utils import save_onnx
 
 
 def _assert_nodes_are_quantized(nodes):
@@ -119,3 +121,32 @@ def test_resnet_residual_connection_with_downsample(tmp_path):
     onnx_path = os.path.join(tmp_path, "model.onnx")
     export_as_onnx(model_torch, input_tensor, onnx_filename=onnx_path)
     _check_resnet_residual_connection(onnx_path)
+
+
+def test_conv_batchnorm_sig_mul_int8(tmp_path):
+    onnx_model = build_conv_batchnorm_sig_mul_model()
+    onnx_path = os.path.join(tmp_path, "conv_batchnorm_sig_mul_model.onnx")
+    save_onnx(onnx_model, onnx_path)
+
+    quantize(onnx_path, quantize_mode="int8", high_precision_dtype="fp16")
+
+    # Output model should be produced in the same tmp_path
+    output_onnx_path = onnx_path.replace(".onnx", ".quant.onnx")
+
+    # Check that quantized explicit model is generated
+    assert os.path.isfile(output_onnx_path)
+
+    # Load the output model and check QDQ node placements
+    graph = gs.import_onnx(onnx.load(output_onnx_path))
+
+    # Check that Conv and ConvTransposed are quantized
+    conv_nodes = [n for n in graph.nodes if "Conv" in n.op]
+    assert _assert_nodes_are_quantized(conv_nodes)
+
+    # Check that only 1 input of Add is quantized
+    add_nodes = [n for n in graph.nodes if n.op == "Add"]
+    for node in add_nodes:
+        quantized_inputs = [inp for inp in node.inputs if inp.inputs[0].op == "DequantizeLinear"]
+        assert len(quantized_inputs) == 1, (
+            f"More than one input of {node.name} is being quantized, but only one should be quantized!"
+        )

Original file line number	Diff line number	Diff line change
`@@ -201,6 +201,7 @@ def _get_backbone(root: Node):`
`201`	`201`	`["BatchNormalization", "BiasAdd", conv_type],`
`202`	`202`	`["Relu", "BatchNormalization", "BiasAdd", conv_type],`
`203`	`203`	`["MaxPool", "Relu", "BatchNormalization", "BiasAdd", conv_type],`
	`204`	`+ ["Mul", "Sigmoid", "BatchNormalization", conv_type],`
`204`	`205`	`]`
`205`	`206`	`for idx, path_type in enumerate(fusible_linear_path_types):`
`206`	`207`	`if has_path_type(node, graph, path_type, is_forward=False, wild_card_types=[]):`