[QNN-EP] Replace Upsample with Resize during Quantization (microsoft#24896)

chuteng-quic · qti-chuteng · web-flow · commit 3ca8a49f235f · 2025-06-02T12:41:35.000-07:00
### Description
Replace the Upsample with Resize during quantization to avoid causing the invalid graph

### Motivation and Context
After the quantization, if the opset of original onnx model is less than 10, the opset of QDQ model will be upgraded to 11.
However, Upsample is deprecated in opset 11, which will make the onnx model invalid.
So, we replace the Upsample with Resize if the opset needs to be upgraded to 11.

---------

Co-authored-by: chuteng &lt;chuteng@qti.qualcomm.com&gt;
diff --git a/onnxruntime/python/tools/quantization/fusions/__init__.py b/onnxruntime/python/tools/quantization/fusions/__init__.py
@@ -1,3 +1,4 @@
 from .fusion import Fusion  # noqa: F401
 from .fusion_gelu import FusionGelu  # noqa: F401
 from .fusion_layernorm import FusionLayerNormalization  # noqa: F401
+from .replace_upsample_with_resize import ReplaceUpsampleWithResize  # noqa: F401
diff --git a/onnxruntime/python/tools/quantization/fusions/replace_upsample_with_resize.py b/onnxruntime/python/tools/quantization/fusions/replace_upsample_with_resize.py
@@ -0,0 +1,96 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+from __future__ import annotations
+
+import numpy as np
+import onnx
+
+from ..onnx_model import ONNXModel
+from .fusion import Fusion
+
+
+class ReplaceUpsampleWithResize(Fusion):
+    """Replace Upsample with Resize."""
+
+    def __init__(self, model: ONNXModel, opset):
+        """Initialize."""
+        super().__init__(model, "Resize", "Upsample")
+        self.opset = opset
+
+    def fuse(
+        self,
+        node: onnx.NodeProto,
+        input_name_to_nodes: dict[str, list[onnx.NodeProto]],
+        output_name_to_node: dict[str, onnx.NodeProto],
+    ):
+        """Replace Upsample with Resize."""
+        mode = None
+        for attr in node.attribute:
+            if attr.name == "mode":
+                mode = attr.s.decode("utf-8")
+                break
+
+        scales_input = None
+        if self.opset > 7:
+            scales_input = node.input[1] if len(node.input) > 1 else ""
+            resize_inputs = [node.input[0], node.name + "_roi", scales_input]
+        else:
+            if self.opset == 7:
+                for attr in node.attribute:
+                    if attr.name == "scales":
+                        scales_input = attr.floats
+                        break
+
+                scales_input = np.array(list(scales_input), np.float32)
+            else:
+                h_scale = 1
+                w_scale = 1
+                for attr in node.attribute:
+                    if attr.name == "height_scale":
+                        h_scale = attr.float
+                    elif attr.name == "width_scale":
+                        w_scale = attr.float
+
+                scales_input = np.array([1, 1, h_scale, w_scale], np.float32)
+
+            scales_tensor = onnx.helper.make_tensor(
+                name=node.name + "_scales",
+                data_type=onnx.TensorProto.FLOAT,
+                dims=scales_input.shape,
+                vals=scales_input.flatten().tolist(),
+            )
+
+            scales_node = onnx.helper.make_node(
+                "Constant", inputs=[], outputs=[node.name + "_scales"], value=scales_tensor
+            )
+
+            self.nodes_to_add.append(scales_node)
+
+            resize_inputs = [node.input[0], node.name + "_roi", node.name + "_scales"]
+
+        roi_tensor = onnx.helper.make_tensor(
+            name=node.name + "_roi",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(len(scales_input) * 2,),
+            vals=[0] * len(scales_input) + [1] * len(scales_input),
+        )
+
+        roi_node = onnx.helper.make_node("Constant", inputs=[], outputs=[node.name + "_roi"], value=roi_tensor)
+
+        resize_node = onnx.helper.make_node(
+            op_type="Resize", inputs=resize_inputs, outputs=node.output, mode=mode, nearest_mode="floor"
+        )
+
+        self.nodes_to_remove.append(node)
+        self.nodes_to_add.append(roi_node)
+        self.nodes_to_add.append(resize_node)
+
+    def apply(self) -> bool:
+        """Apply."""
+        if super().apply():
+            self.model.topological_sort()
+            return True
+        return False
diff --git a/onnxruntime/python/tools/quantization/shape_inference.py b/onnxruntime/python/tools/quantization/shape_inference.py
@@ -16,7 +16,9 @@
 from onnxruntime.tools.symbolic_shape_infer import SymbolicShapeInference
 from onnxruntime.transformers.onnx_utils import extract_raw_data_from_model, has_external_data
 
-from .quant_utils import add_pre_process_metadata
+from .fusions import ReplaceUpsampleWithResize
+from .onnx_model import ONNXModel
+from .quant_utils import add_pre_process_metadata, save_and_reload_model_with_shape_infer
 
 logger = logging.getLogger(__name__)
 
@@ -85,6 +87,21 @@ def quant_pre_process(
                 verbose,
             )
 
+        # Since Upsample is deprecated after opset v10, and the model's opset will
+        # be upgraded to at least v11 during quantization, we need to replace Upsample
+        # with Resize first to avoid generating an invalid model.
+        if model:
+            ai_onnx_domain = [opset for opset in model.opset_import if not opset.domain or opset.domain == "ai.onnx"]
+            if len(ai_onnx_domain) == 1:
+                opset_version = ai_onnx_domain[0].version
+                if opset_version < 10:
+                    ReplaceUpsampleWithResize(ONNXModel(model), opset_version).apply()
+                    model.opset_import.remove(ai_onnx_domain[0])
+                    opset_version = 11
+                    model.opset_import.extend([onnx.helper.make_opsetid("", opset_version)])
+                    model = onnx.version_converter.convert_version(model, opset_version)
+                    model = save_and_reload_model_with_shape_infer(model)
+
         if not skip_optimization:
             # Use ORT optimizers (native code) to optimize model
             if not skip_symbolic_shape: