ONNX 1.19 Fix: Changed IR version to 10 to be compatible with onnxruntime and added axis, block size attributes to dq node

hthadicherla · hthadicherla · commit f6734ec55d9e · 2025-10-10T16:37:06.000+05:30
Signed-off-by: Hrishith Thadicherla &lt;hthadicherla@nvidia.com&gt;
diff --git a/modelopt/onnx/quantization/gs_patching.py b/modelopt/onnx/quantization/gs_patching.py
@@ -48,6 +48,7 @@ def _make_constant(
     setattr(t, "explicit_dtype", dtype)
     return t
 
+
 def _make_variable(
     name: str, dtype: onnx.TensorProto.DataType, shape: Sequence[int | str]
 ) -> gs.Constant:
diff --git a/modelopt/onnx/quantization/int4.py b/modelopt/onnx/quantization/int4.py
@@ -98,6 +98,7 @@
 # supported and working
 CLIP_MIN = 1e-5
 
+
 def safe_cupy_array(tensor):
     """Convert ml_dtypes.int4 tensor to numpy.int8 for CuPy compatibility.
 
@@ -304,7 +305,10 @@ def quantize_rtn(
 
         if gather_w_map is not None:
             assert gather_s_map is not None, "scale-map not found for quantizable gather nodes"
-            gather_dq_node_attributes = {"axis": gather_quantize_axis, "block_size": gather_block_size}
+            gather_dq_node_attributes = {
+                "axis": gather_quantize_axis,
+                "block_size": gather_block_size,
+            }
             qdq.insert_dq_nodes(
                 graph,
                 gather_s_map,
@@ -330,6 +334,7 @@ def quantize_rtn(
 
     return model
 
+
 class AWQClipHelper:
     """AWQ calibration helper class."""