From 57b24fac8a6c0c4a27369c5eaa4d17a2b91d4811 Mon Sep 17 00:00:00 2001
From: gcunhase <4861122+gcunhase@users.noreply.github.com>
Date: Mon, 20 Oct 2025 18:09:22 -0400
Subject: [PATCH 1/4] Fix for 'SymbolicShapeInference' error

Signed-off-by: gcunhase <4861122+gcunhase@users.noreply.github.com>
---
 modelopt/onnx/quantization/graph_utils.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/modelopt/onnx/quantization/graph_utils.py b/modelopt/onnx/quantization/graph_utils.py
index e10c001b4..93cfc472b 100755
--- a/modelopt/onnx/quantization/graph_utils.py
+++ b/modelopt/onnx/quantization/graph_utils.py
@@ -27,7 +27,6 @@
 from onnx_graphsurgeon.ir.node import Node
 from onnx_graphsurgeon.ir.tensor import Constant, Tensor, Variable
 from onnxruntime.quantization.calibrate import CalibrationDataReader
-from onnxruntime.tools.symbolic_shape_infer import SymbolicShapeInference
 
 from modelopt.onnx.logging_config import logger
 from modelopt.onnx.op_types import is_copy_op, is_linear_op
@@ -966,7 +965,7 @@ def find_nodes_from_matmul_to_exclude(
     logger.debug(f"Found {len(matmul_nodes)} MatMul nodes to analyze")
 
     if calibration_shapes:
-        nodes_to_exclude = _exclude_matmuls_by_symbolic_inference(
+        nodes_to_exclude = _exclude_matmuls_by_shape_inference(
             model, matmul_nodes, calibration_shapes
         )
     else:
@@ -1058,10 +1057,10 @@ def find_nodes_from_convs_to_exclude(graph: Graph, quantize_mode: str = "int8"):
     return unsupported_conv_nodes
 
 
-def _exclude_matmuls_by_symbolic_inference(
+def _exclude_matmuls_by_shape_inference(
     model: onnx.ModelProto, matmul_nodes: list, calibration_shapes: str | dict | None = None
 ) -> list[str]:
-    """Use symbolic shape inference to find MatMuls with dimension 1."""
+    """Use shape inference to find MatMuls with dimension 1."""
     # Prepare model for symbolic inference
     for graph_input in model.graph.input:
         for dim in graph_input.type.tensor_type.shape.dim:
@@ -1070,11 +1069,13 @@ def _exclude_matmuls_by_symbolic_inference(
                 dim.dim_value = 1
 
     # Apply calibration shapes if provided
-    input_shapes = (
-        parse_shapes_spec(calibration_shapes)
-        if (calibration_shapes and isinstance(calibration_shapes, str))
-        else {}
-    )
+    input_shapes = {}
+    if calibration_shapes:
+        input_shapes = (
+            parse_shapes_spec(calibration_shapes)
+            if isinstance(calibration_shapes, str)
+            else calibration_shapes
+        )
     for graph_input in model.graph.input:
         if graph_input.name in input_shapes:
             input_shape = input_shapes[graph_input.name]
@@ -1087,8 +1088,7 @@ def _exclude_matmuls_by_symbolic_inference(
             for dim, new_dim_value in zip(tensor_shape, input_shape):
                 dim.dim_value = new_dim_value
 
-    model.graph.ClearField("value_info")
-    model = SymbolicShapeInference.infer_shapes(model)
+    model = onnx.shape_inference.infer_shapes(model)
     value_info_map = {vi.name: vi for vi in model.graph.value_info}
 
     nodes_to_exclude = []

From 543c2ca829926b1fca18bf54b36d0869d58cdf57 Mon Sep 17 00:00:00 2001
From: gcunhase <4861122+gcunhase@users.noreply.github.com>
Date: Mon, 20 Oct 2025 18:14:58 -0400
Subject: [PATCH 2/4] Replaced onnx.shape_inferend.infer_shapes with utils
 function

Signed-off-by: gcunhase <4861122+gcunhase@users.noreply.github.com>
---
 modelopt/onnx/quantization/graph_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modelopt/onnx/quantization/graph_utils.py b/modelopt/onnx/quantization/graph_utils.py
index 93cfc472b..85c30c79c 100755
--- a/modelopt/onnx/quantization/graph_utils.py
+++ b/modelopt/onnx/quantization/graph_utils.py
@@ -35,6 +35,7 @@
     find_lowest_common_ancestor,
     get_child_nodes,
     get_parent_nodes,
+    infer_shapes,
     parse_shapes_spec,
     save_onnx,
 )
@@ -1088,7 +1089,7 @@ def _exclude_matmuls_by_shape_inference(
             for dim, new_dim_value in zip(tensor_shape, input_shape):
                 dim.dim_value = new_dim_value
 
-    model = onnx.shape_inference.infer_shapes(model)
+    model = infer_shapes(model)
     value_info_map = {vi.name: vi for vi in model.graph.value_info}
 
     nodes_to_exclude = []

From a5349330ef8be42743de60d9676314cc98dad161 Mon Sep 17 00:00:00 2001
From: gcunhase <4861122+gcunhase@users.noreply.github.com>
Date: Mon, 20 Oct 2025 22:45:46 -0400
Subject: [PATCH 3/4] Fix: add output info to value_info_map

Signed-off-by: gcunhase <4861122+gcunhase@users.noreply.github.com>
---
 modelopt/onnx/quantization/graph_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modelopt/onnx/quantization/graph_utils.py b/modelopt/onnx/quantization/graph_utils.py
index 85c30c79c..3871328db 100755
--- a/modelopt/onnx/quantization/graph_utils.py
+++ b/modelopt/onnx/quantization/graph_utils.py
@@ -1091,6 +1091,7 @@ def _exclude_matmuls_by_shape_inference(
 
     model = infer_shapes(model)
     value_info_map = {vi.name: vi for vi in model.graph.value_info}
+    value_info_map.update({vi.name: vi for vi in model.graph.output})
 
     nodes_to_exclude = []
     for matmul_node in matmul_nodes:

From 663d9f038073c0f19dc4ef641a2e1fa815d4c121 Mon Sep 17 00:00:00 2001
From: gcunhase <4861122+gcunhase@users.noreply.github.com>
Date: Mon, 20 Oct 2025 22:46:29 -0400
Subject: [PATCH 4/4] Ensure calibration_shapes is populated if
 calibrate_per_node is enabled

Signed-off-by: gcunhase <4861122+gcunhase@users.noreply.github.com>
---
 modelopt/onnx/quantization/quantize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modelopt/onnx/quantization/quantize.py b/modelopt/onnx/quantization/quantize.py
index 2d23b875a..9bc025e33 100755
--- a/modelopt/onnx/quantization/quantize.py
+++ b/modelopt/onnx/quantization/quantize.py
@@ -470,7 +470,7 @@ def quantize(
         calibration_eps,
     )
 
-    if not calibration_shapes:
+    if calibrate_per_node and not calibration_shapes:
         calibration_shapes = get_input_shapes(onnx_path)
 
     if quantize_mode in ["fp8", "int8"]: