|
20 | 20 | from functools import partial |
21 | 21 |
|
22 | 22 | import torch |
23 | | -from _test_utils.import_helper import skip_if_no_libcudnn, skip_if_onnx_version_above_1_18 |
| 23 | +from _test_utils.import_helper import skip_if_no_libcudnn |
24 | 24 | from _test_utils.onnx_quantization.lib_test_models import SimpleMLP, export_as_onnx, find_init |
25 | 25 | from _test_utils.torch_quantization.quantize_common import get_awq_config |
26 | 26 |
|
|
39 | 39 | # test_qdq_utils_fp8.py::test_fused_q[bf16,fp16] fails if this script runs after the int4 test, but not before. |
40 | 40 |
|
41 | 41 |
|
42 | | -def test_int4_awq(tmp_path): |
43 | | - skip_if_onnx_version_above_1_18() |
| 42 | +def test_safe_cupy_array(monkeypatch): |
| 43 | + """Comprehensive test for safe_cupy_array covering all code paths.""" |
| 44 | + import builtins |
| 45 | + |
| 46 | + import numpy # Import actual numpy for creating int4 tensors |
| 47 | + |
| 48 | + # Test 1: Regular numpy array (should hit line 122) |
| 49 | + result = int4.safe_cupy_array(numpy.array([1, 2, 3, 4], dtype=numpy.float32)) |
| 50 | + assert isinstance(result, np.ndarray) |
| 51 | + |
| 52 | + # Test 2: With real ml_dtypes.int4 (covers lines 117-118) |
| 53 | + try: |
| 54 | + import ml_dtypes |
| 55 | + |
| 56 | + int4_tensor = numpy.array([1, 2, -3, 4], dtype=numpy.float32).astype(ml_dtypes.int4) |
| 57 | + result = int4.safe_cupy_array(int4_tensor) |
| 58 | + assert isinstance(result, np.ndarray) and result.dtype == numpy.int8 |
| 59 | + expected = int4_tensor.astype(numpy.int8) |
| 60 | + actual = result.get() if int4.has_cupy else result |
| 61 | + np.testing.assert_array_equal(actual, expected) |
| 62 | + except ImportError: |
| 63 | + pass # ml_dtypes not available |
| 64 | + |
| 65 | + # Test 3: When ml_dtypes import fails (covers ImportError catch and line 122) |
| 66 | + original_import = builtins.__import__ |
44 | 67 |
|
| 68 | + def mock_import(name, *args, **kwargs): |
| 69 | + if name == "ml_dtypes": |
| 70 | + raise ImportError("ml_dtypes not available") |
| 71 | + return original_import(name, *args, **kwargs) |
| 72 | + |
| 73 | + monkeypatch.setattr(builtins, "__import__", mock_import) |
| 74 | + |
| 75 | + # Use actual numpy for creating the array |
| 76 | + result = int4.safe_cupy_array(numpy.array([5, 6, 7, 8], dtype=numpy.int8)) |
| 77 | + assert isinstance(result, np.ndarray) |
| 78 | + |
| 79 | + |
| 80 | +def test_int4_awq(tmp_path): |
45 | 81 | def _forward_loop(model, dataloader): |
46 | 82 | """Forward loop for calibration.""" |
47 | 83 | for data in dataloader: |
@@ -94,20 +130,19 @@ def _forward_loop(model, dataloader): |
94 | 130 | scale_awq_lite = find_init(onnx_model_awq_lite, scale_names[i]) |
95 | 131 |
|
96 | 132 | if int4.has_cupy: |
97 | | - wq_onnx_awq_lite = np.array(wq_onnx_awq_lite) |
98 | | - scale_awq_lite = np.array(scale_awq_lite) |
| 133 | + wq_onnx_awq_lite = int4.safe_cupy_array(wq_onnx_awq_lite) |
| 134 | + scale_awq_lite = int4.safe_cupy_array(scale_awq_lite) |
99 | 135 |
|
100 | 136 | wq_onnx_awq_lite = dq_tensor(wq_onnx_awq_lite, scale_awq_lite, block_size) |
101 | | - |
102 | 137 | wq_torch_awq_clip = model_torch_copy.net[i * 2].weight_quantizer( |
103 | 138 | model_torch_copy.net[i * 2].weight |
104 | 139 | ) |
105 | 140 | wq_onnx_awq_clip = find_init(onnx_model_awq_clip, wq_names[i]) |
106 | 141 | scale_awq_clip = find_init(onnx_model_awq_clip, scale_names[i]) |
107 | 142 |
|
108 | 143 | if int4.has_cupy: |
109 | | - wq_onnx_awq_clip = np.array(wq_onnx_awq_clip) |
110 | | - scale_awq_clip = np.array(scale_awq_clip) |
| 144 | + wq_onnx_awq_clip = int4.safe_cupy_array(wq_onnx_awq_clip) |
| 145 | + scale_awq_clip = int4.safe_cupy_array(scale_awq_clip) |
111 | 146 |
|
112 | 147 | wq_onnx_awq_clip = dq_tensor(wq_onnx_awq_clip, scale_awq_clip, block_size) |
113 | 148 |
|
|
0 commit comments