Skip to content

Commit f6734ec

Browse files
committed
ONNX 1.19 Fix: Changed IR version to 10 to be compatible with onnxruntime and added axis, block size attributes to dq node
Signed-off-by: Hrishith Thadicherla <[email protected]>
1 parent 843cf44 commit f6734ec

File tree

2 files changed

+7
-1
lines changed

2 files changed

+7
-1
lines changed

modelopt/onnx/quantization/gs_patching.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def _make_constant(
4848
setattr(t, "explicit_dtype", dtype)
4949
return t
5050

51+
5152
def _make_variable(
5253
name: str, dtype: onnx.TensorProto.DataType, shape: Sequence[int | str]
5354
) -> gs.Constant:

modelopt/onnx/quantization/int4.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@
9898
# supported and working
9999
CLIP_MIN = 1e-5
100100

101+
101102
def safe_cupy_array(tensor):
102103
"""Convert ml_dtypes.int4 tensor to numpy.int8 for CuPy compatibility.
103104
@@ -304,7 +305,10 @@ def quantize_rtn(
304305

305306
if gather_w_map is not None:
306307
assert gather_s_map is not None, "scale-map not found for quantizable gather nodes"
307-
gather_dq_node_attributes = {"axis": gather_quantize_axis, "block_size": gather_block_size}
308+
gather_dq_node_attributes = {
309+
"axis": gather_quantize_axis,
310+
"block_size": gather_block_size,
311+
}
308312
qdq.insert_dq_nodes(
309313
graph,
310314
gather_s_map,
@@ -330,6 +334,7 @@ def quantize_rtn(
330334

331335
return model
332336

337+
333338
class AWQClipHelper:
334339
"""AWQ calibration helper class."""
335340

0 commit comments

Comments
 (0)