[5274346] Skip copy ops in CASK patterns, added unittest

gcunhase · gcunhase · commit b442a288a15e · 2025-10-17T12:22:34.000-04:00
Signed-off-by: gcunhase &lt;4861122+gcunhase@users.noreply.github.com&gt;
diff --git a/modelopt/onnx/op_types.py b/modelopt/onnx/op_types.py
@@ -96,9 +96,9 @@ def is_fusible_scaling_op(op_type: str):
     ]
 
 
-def is_copy_op(op_type: str):
-    """Returns whether the given op is a copy operator or not."""
-    return op_type in [
+def copy_ops():
+    """Returns list of copy operators."""
+    return [
         "Flatten",
         "Transpose",
         "Concat",
@@ -118,6 +118,11 @@ def is_copy_op(op_type: str):
     ]
 
 
+def is_copy_op(op_type: str):
+    """Returns whether the given op is a copy operator or not."""
+    return op_type in copy_ops()
+
+
 def is_linear_op(op_type: str):
     """Returns whether the given op type is of Linear category or not."""
     return op_type in ["Conv", "ConvTranspose", "Gemm", "MatMul"]
diff --git a/modelopt/onnx/quantization/graph_utils.py b/modelopt/onnx/quantization/graph_utils.py
@@ -30,7 +30,7 @@
 from onnxruntime.tools.symbolic_shape_infer import SymbolicShapeInference
 
 from modelopt.onnx.logging_config import logger
-from modelopt.onnx.op_types import is_copy_op, is_linear_op
+from modelopt.onnx.op_types import copy_ops, is_copy_op, is_linear_op
 from modelopt.onnx.quantization.ort_utils import create_inference_session
 from modelopt.onnx.utils import (
     find_lowest_common_ancestor,
@@ -203,7 +203,7 @@ def _get_backbone(root: Node):
             ["MaxPool", "Relu", "BatchNormalization", "BiasAdd", conv_type],
         ]
     for idx, path_type in enumerate(fusible_linear_path_types):
-        if has_path_type(node, graph, path_type, is_forward=False, wild_card_types=[]):
+        if has_path_type(node, graph, path_type, is_forward=False, wild_card_types=copy_ops()):
             return _get_backbone(node)
 
     return None
diff --git a/modelopt/onnx/quantization/partitioning.py b/modelopt/onnx/quantization/partitioning.py
@@ -44,7 +44,7 @@ def _build_fusible_partition(
     """Traverses the graph starting from cur_node and updates the fusible_partition list.
 
     Add a nodes to the partition if any of these holds:
-    1. The node is a unary or binary pointwise operation and fusible by cask
+    1. The node is a unary or binary pointwise operation or a copy op and fusible by cask
     2. The node is BN and/or Relu and fusible with preceding Conv op (Conv-Act fusion)
     3. The node is MaxPool following a Conv-Act pattern (Conv-Act-Pool fusion)
     4. The node is a residual Add and fusible with current partition
@@ -132,6 +132,10 @@ def _is_fusible_mul(mul_node: Node) -> bool:
 
         if (
             (
+                is_copy_op(consumer_node.op)
+                and _is_cask_fusible(consumer_node, partition_node_outputs)
+            )
+            or (
                 is_pointwise_or_elementwise_op(consumer_node.op)
                 and _is_cask_fusible(consumer_node, partition_node_outputs)
             )
diff --git a/tests/_test_utils/onnx_quantization/lib_test_models.py b/tests/_test_utils/onnx_quantization/lib_test_models.py
@@ -556,7 +556,7 @@ def build_convtranspose_conv_residual_model():
     return model_inferred
 
 
-def build_conv_act_pool_model():
+def build_conv_act_pool_model(include_reshape_node=False):
     # Define your model inputs and outputs
     input_names = ["input_0"]
     output_names = ["output_0"]
@@ -582,7 +582,7 @@ def build_conv_act_pool_model():
             dilations=[1, 1],
             group=1,
             kernel_shape=[3, 3],
-            pads=[0, 0, 0, 0],
+            pads=[1, 1, 1, 1],
             strides=[1, 1],
         ),
         helper.make_node(
@@ -597,28 +597,43 @@ def build_conv_act_pool_model():
             outputs=["relu1_relu/Relu:0"],
             name="relu1_relu/Relu",
         ),
-        helper.make_node(
-            op_type="MaxPool",
-            inputs=["relu1_relu/Relu:0"],
-            outputs=["maxpool1_maxpool/MaxPool2D:0"],
-            name="maxpool1_maxpool/MaxPool2D",
-            ceil_mode=False,
-            kernel_shape=[3, 3],
-            pads=[0, 0, 0, 0],
-            strides=[2, 2],
-        ),
-        helper.make_node(
-            op_type="Conv",
-            inputs=["maxpool1_maxpool/MaxPool2D:0", "weights_2"],
-            outputs=["output_0"],
-            name="conv2_conv/Conv2D",
-            dilations=[1, 1],
-            group=1,
-            kernel_shape=[3, 3],
-            pads=[0, 0, 0, 0],
-            strides=[1, 1],
-        ),
     ]
+    if include_reshape_node:
+        nodes.append(
+            helper.make_node(
+                op_type="Reshape",
+                inputs=["relu1_relu/Relu:0", "shape_1"],
+                outputs=["reshape1_reshape/Reshape:0"],
+                name="reshape1_reshape/Reshape",
+            ),
+        )
+    nodes.extend(
+        [
+            helper.make_node(
+                op_type="MaxPool",
+                inputs=[
+                    "reshape1_reshape/Reshape:0" if include_reshape_node else "relu1_relu/Relu:0"
+                ],
+                outputs=["maxpool1_maxpool/MaxPool2D:0"],
+                name="maxpool1_maxpool/MaxPool2D",
+                ceil_mode=False,
+                kernel_shape=[3, 3],
+                pads=[1, 1, 1, 1],
+                strides=[2, 2],
+            ),
+            helper.make_node(
+                op_type="Conv",
+                inputs=["maxpool1_maxpool/MaxPool2D:0", "weights_2"],
+                outputs=["output_0"],
+                name="conv2_conv/Conv2D",
+                dilations=[1, 1],
+                group=1,
+                kernel_shape=[3, 3],
+                pads=[1, 1, 1, 1],
+                strides=[1, 1],
+            ),
+        ]
+    )
 
     # Create the ONNX initializers
     initializers = [
@@ -665,6 +680,15 @@ def build_conv_act_pool_model():
             vals=np.random.uniform(low=0.5, high=1.0, size=128 * 128 * 3 * 3),
         ),
     ]
+    if include_reshape_node:
+        initializers.append(
+            helper.make_tensor(
+                name="shape_1",
+                data_type=onnx.TensorProto.INT64,
+                dims=(4,),
+                vals=(32, 128, 256, 256),
+            ),
+        )
 
     # Create the ONNX graph with the nodes and initializers
     graph = helper.make_graph(nodes, "conv_act_pool", inputs, outputs, initializer=initializers)
diff --git a/tests/unit/onnx/test_quantize_int8.py b/tests/unit/onnx/test_quantize_int8.py
@@ -97,9 +97,10 @@ def test_convtranspose_conv_residual_int8(tmp_path):
         )
 
 
-def test_conv_act_pool_int8(tmp_path):
-    onnx_model = build_conv_act_pool_model()
-    onnx_path = os.path.join(tmp_path, "conv_act_pool_model.onnx")
+@pytest.mark.parametrize("include_reshape_node", [False, True])
+def test_conv_act_pool_int8(tmp_path, include_reshape_node):
+    onnx_model = build_conv_act_pool_model(include_reshape_node)
+    onnx_path = os.path.join(tmp_path, f"conv_act_pool_model_{include_reshape_node}.onnx")
     save_onnx(onnx_model, onnx_path)
 
     moq.quantize(onnx_path, quantize_mode="int8", high_precision_dtype="fp16")