INT4 ONNX Version Fix: Code Quality Improvements

hthadicherla · hthadicherla · commit 27b11c8aaa75 · 2025-10-13T12:16:29.000+05:30
Signed-off-by: Hrishith Thadicherla &lt;hthadicherla@nvidia.com&gt;
diff --git a/modelopt/onnx/quantization/int4.py b/modelopt/onnx/quantization/int4.py
@@ -98,21 +98,20 @@
 # supported and working
 CLIP_MIN = 1e-5
 
-def safe_cupy_array(tensor):
-    """
-    Convert ml_dtypes.int4 tensor to numpy.int8 for CuPy compatibility.
+def safe_cupy_array(tensor): 
+    """Convert ml_dtypes.int4 tensor to numpy.int8 for CuPy compatibility.
     
     In ONNX 1.19, int4 tensors use ml_dtypes.int4 which CuPy doesn't support.
     This function converts them to regular numpy.int8 while preserving values.
-    
     Args:
-        tensor: numpy array that may have ml_dtypes.int4 dtype
-        
+        tensor: numpy array that may have ml_dtypes.int4 dtype 
     Returns:
-        cupy or numpy array (if cupy is not supported) with numpy.int8 dtype if input was ml_dtypes.int4, otherwise unchanged
+        cupy or numpy array (if cupy is not supported) with numpy.int8 dtype if input was ml_dtypes.int4, 
+        otherwise unchanged
     """
     try:
         import ml_dtypes
+        
         if hasattr(tensor, 'dtype') and tensor.dtype == ml_dtypes.int4:
             return np.asarray(tensor.astype(numpy.int8))
     except ImportError:
diff --git a/tests/gpu/onnx/test_quantize_onnx_torch_int4_awq.py b/tests/gpu/onnx/test_quantize_onnx_torch_int4_awq.py
@@ -20,7 +20,7 @@
 from functools import partial
 
 import torch
-from _test_utils.import_helper import skip_if_no_libcudnn, skip_if_onnx_version_above_1_18
+from _test_utils.import_helper import skip_if_no_libcudnn
 from _test_utils.onnx_quantization.lib_test_models import SimpleMLP, export_as_onnx, find_init
 from _test_utils.torch_quantization.quantize_common import get_awq_config
 
@@ -40,8 +40,6 @@
 
 
 def test_int4_awq(tmp_path):
-    # skip_if_onnx_version_above_1_18()
-
     def _forward_loop(model, dataloader):
         """Forward loop for calibration."""
         for data in dataloader:
@@ -115,7 +113,6 @@ def _forward_loop(model, dataloader):
 
 
 def test_int4_awq_cuda(tmp_path):
-    # skip_if_onnx_version_above_1_18()
     skip_if_no_libcudnn()
     block_size = 128