Skip to content

Commit d79376c

Browse files
committed
Remove old change_linear_weights_to_* APIs
**Summary:** This commit removes these super old quantization APIs that aren't even accessible by the user: ``` change_linear_weights_to_int8_dqtensors change_linear_weights_to_int8_woqtensors change_linear_weights_to_int4_woqtensors ``` **Test Plan:** CI ghstack-source-id: d370c59 Pull Request resolved: #2721
1 parent 545858c commit d79376c

File tree

5 files changed

+3
-151
lines changed

5 files changed

+3
-151
lines changed

benchmarks/benchmark_aq.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,13 @@ def _ref_change_linear_weights_to_int8_dqtensors(model, filter_fn=None, **kwargs
7575
"""
7676
from torchao.quantization.quant_api import (
7777
_get_subclass_inserter,
78-
_in_features_greater_than_16,
7978
_is_linear,
8079
)
8180
from torchao.quantization.subclass import Int8DynamicallyQuantizedLinearWeight
8281

82+
def _in_features_greater_than_16(mod, *args):
83+
return hasattr(mod, "in_features") and mod.in_features > 16
84+
8385
if filter_fn is None:
8486
filter_fn = lambda *args: _is_linear(*args) and _in_features_greater_than_16(
8587
*args

test/integration/test_integration.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@
4040
from torchao.quantization.quant_api import (
4141
Float8DynamicActivationFloat8WeightConfig,
4242
_replace_with_custom_fn_if_matches_filter,
43-
change_linear_weights_to_int8_dqtensors,
4443
int4_weight_only,
4544
int8_dynamic_activation_int4_weight,
4645
int8_dynamic_activation_int8_weight,
@@ -1829,11 +1828,6 @@ class TestAOTI(unittest.TestCase):
18291828
list(itertools.product(TENSOR_SUBCLASS_APIS, COMMON_DEVICES, COMMON_DTYPES)),
18301829
)
18311830
def test_aoti(self, api, test_device, test_dtype):
1832-
if api is change_linear_weights_to_int8_dqtensors and test_device == "cuda":
1833-
self.skipTest(
1834-
f"{api} in {test_device} is not support for aoti compilation yet"
1835-
)
1836-
18371831
if (
18381832
test_device == "cuda"
18391833
and torch.cuda.is_available()

test/quantization/test_quant_api.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -146,32 +146,6 @@ def forward(self, x):
146146
return x
147147

148148

149-
def _ref_change_linear_weights_to_int8_dqtensors(model, filter_fn=None, **kwargs):
150-
"""
151-
The deprecated implementation for int8 dynamic quant API, used as a reference for
152-
numerics and performance
153-
"""
154-
from torchao.quantization.quant_api import (
155-
_get_subclass_inserter,
156-
_in_features_greater_than_16,
157-
_is_linear,
158-
)
159-
from torchao.quantization.subclass import Int8DynamicallyQuantizedLinearWeight
160-
161-
if filter_fn is None:
162-
filter_fn = lambda *args: _is_linear(*args) and _in_features_greater_than_16(
163-
*args
164-
)
165-
166-
_replace_with_custom_fn_if_matches_filter(
167-
model,
168-
_get_subclass_inserter(
169-
Int8DynamicallyQuantizedLinearWeight, enable_parametrization=False, **kwargs
170-
),
171-
filter_fn,
172-
)
173-
174-
175149
def _get_ref_change_linear_weights_to_woqtensors(deprecated_tenosr_subclass):
176150
def _ref_change_linear_weights_to_woqtensors(model, filter_fn=None, **kwargs):
177151
"""

torchao/quantization/README.md

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -125,44 +125,29 @@ be applied individually. While there are a large variety of quantization apis, t
125125
#### A16W4 WeightOnly Quantization
126126

127127
```python
128-
# for torch 2.4+
129128
from torchao.quantization import quantize_, Int4WeightOnlyConfig
130129
group_size = 32
131130

132131
# you can enable [hqq](https://github.com/mobiusml/hqq/tree/master) quantization which is expected to improves accuracy through
133132
# use_hqq flag for `Int4WeightOnlyConfig` quantization
134133
use_hqq = False
135134
quantize_(model, Int4WeightOnlyConfig(group_size=group_size, use_hqq=use_hqq))
136-
137-
# for torch 2.2.2 and 2.3
138-
from torchao.quantization.quant_api import change_linear_weights_to_int4_woqtensors
139-
change_linear_weights_to_int4_woqtensors(model)
140135
```
141136

142137
Note: The quantization error incurred by applying int4 quantization to your model can be fairly significant, so using external techniques like GPTQ may be necessary to obtain a usable model.
143138

144139
#### A16W8 Int8 WeightOnly Quantization
145140

146141
```python
147-
# for torch 2.4+
148142
from torchao.quantization import quantize_, Int8WeightOnlyConfig
149143
quantize_(model, Int8WeightOnlyConfig())
150-
151-
# for torch 2.2.2 and 2.3
152-
from torchao.quantization.quant_api import change_linear_weights_to_int8_woqtensors
153-
change_linear_weights_to_int8_woqtensors(model)
154144
```
155145

156146
#### A8W8 Int8 Dynamic Quantization
157147

158148
```python
159-
# for torch 2.4+
160149
from torchao.quantization import quantize_, Int8DynamicActivationInt8WeightConfig
161150
quantize_(model, Int8DynamicActivationInt8WeightConfig())
162-
163-
# for torch 2.2.2 and 2.3
164-
from torchao.quantization.quant_api import change_linear_weights_to_int8_dqtensors
165-
change_linear_weights_to_int8_dqtensors(model)
166151
```
167152

168153
### A16W8 Float8 WeightOnly Quantization

torchao/quantization/quant_api.py

Lines changed: 0 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -172,109 +172,6 @@
172172
}
173173

174174

175-
######
176-
# TO BE DEPRECATED START
177-
######
178-
def _in_features_greater_than_16(mod, *args):
179-
return hasattr(mod, "in_features") and mod.in_features > 16
180-
181-
182-
# TODO: delete
183-
def change_linear_weights_to_int8_dqtensors(model, filter_fn=None, **kwargs):
184-
"""
185-
Converts all linear weight tensors to the `Int8DynamicallyQuantizedLinearWeight`
186-
Tensor subclass, effectively applying the same form of quantization
187-
as apply_dynamic_quant while not modifying the linear modules.
188-
"""
189-
raise ImportError(
190-
"This API is deprecated for pytorch 2.4+, please checkout quantization/README.md for most up to date APIs"
191-
)
192-
193-
if filter_fn is None:
194-
filter_fn = lambda *args: _is_linear(*args) and _in_features_greater_than_16(
195-
*args
196-
)
197-
198-
_replace_with_custom_fn_if_matches_filter(
199-
model,
200-
_get_subclass_inserter(
201-
Int8DynamicallyQuantizedLinearWeight, enable_parametrization=False, **kwargs
202-
),
203-
filter_fn,
204-
)
205-
206-
207-
# TODO: delete
208-
def change_linear_weights_to_int8_woqtensors(model, filter_fn=None, **kwargs):
209-
"""
210-
Converts all linear weight tensors to the
211-
`Int8WeightOnlyQuantizedLinearWeight` tensor subclass,
212-
effectively applying the same form of quantization
213-
as apply_weight_only_int8_quant while not modifying the linear modules.
214-
"""
215-
raise ImportError(
216-
"This API is deprecated for pytorch 2.4+, please checkout quantization/README.md for most up to date APIs"
217-
)
218-
219-
_replace_with_custom_fn_if_matches_filter(
220-
model,
221-
_get_subclass_inserter(
222-
Int8WeightOnlyQuantizedLinearWeight, enable_parametrization=False, **kwargs
223-
),
224-
_is_linear if filter_fn is None else filter_fn,
225-
)
226-
227-
228-
# TODO: delete
229-
def change_linear_weights_to_int4_woqtensors(
230-
model,
231-
groupsize=128,
232-
inner_k_tiles=8,
233-
filter_fn=None,
234-
zero_point_domain=ZeroPointDomain.FLOAT,
235-
preserve_zero=False,
236-
):
237-
"""
238-
Converts all linear weight tensors to the
239-
`Int4WeightOnlyQuantizedLinearWeight` tensor subclass,
240-
effectively applying the same form of quantization
241-
as apply_dynamic_quant while not modifying the linear modules.
242-
Args:
243-
`groupsize`: parameter for quantization, controls the granularity of quantization, smaller
244-
size is more fine grained, choices are [256, 128, 64, 32]
245-
`inner_k_tiles`: parameter for int4 mm kernel, choices are [8, 4, 2]
246-
`filter_fn`: function that takes a nn.Module instance and fully qualified name of the module, \
247-
returns True if we want to run `config` on
248-
`zero_point_domain`: data type of zeros points, choices are [ZeroPointDomain.FLOAT, \
249-
ZeroPointDomain.INT, ZeroPointDomain.NONE]
250-
`preserve_zero`: whether to preserve zero, default is False
251-
"""
252-
raise ImportError(
253-
"This API is deprecated for pytorch 2.4+, please checkout quantization/README.md for most up to date APIs"
254-
)
255-
256-
if filter_fn is None:
257-
filter_fn = _is_linear
258-
259-
_replace_with_custom_fn_if_matches_filter(
260-
model,
261-
_get_subclass_inserter(
262-
Int4WeightOnlyQuantizedLinearWeight,
263-
enable_parametrization=False,
264-
groupsize=groupsize,
265-
inner_k_tiles=inner_k_tiles,
266-
zero_point_domain=zero_point_domain,
267-
preserve_zero=preserve_zero,
268-
),
269-
filter_fn,
270-
)
271-
272-
273-
########
274-
# TO BE DEPRECATED END
275-
########
276-
277-
278175
def _replace_with_custom_fn_if_matches_filter(
279176
model,
280177
replacement_fn,

0 commit comments

Comments
 (0)