26
26
QuantizationType ,
27
27
)
28
28
from compressed_tensors .quantization .quant_scheme import QuantizationScheme
29
+ from compressed_tensors .utils import deprecated
29
30
from torch import FloatTensor , IntTensor , Tensor
30
31
from torch .nn import Module
31
32
from tqdm import tqdm
36
37
"is_module_quantized" ,
37
38
"is_model_quantized" ,
38
39
"module_type" ,
39
- "calculate_compression_ratio" ,
40
40
"get_torch_bit_depth" ,
41
41
"can_quantize" ,
42
42
"parse_out_kv_cache_args" ,
@@ -276,12 +276,7 @@ def is_model_quantized(model: Module) -> bool:
276
276
:param model: pytorch model
277
277
:return: True if model is quantized, False otherwise
278
278
"""
279
-
280
- for _ , submodule in iter_named_leaf_modules (model ):
281
- if is_module_quantized (submodule ):
282
- return True
283
-
284
- return False
279
+ return any (is_module_quantized (submodule ) for submodule in model .modules ())
285
280
286
281
287
282
def module_type (module : Module ) -> str :
@@ -294,6 +289,11 @@ def module_type(module: Module) -> str:
294
289
return type (module ).__name__
295
290
296
291
292
+ @deprecated (
293
+ message = "This function will be removed in a future release. "
294
+ "Please use `model.named_modules()` and filter by "
295
+ "compressed_tensors.InternalModule if neceessary"
296
+ )
297
297
def iter_named_leaf_modules (model : Module ) -> Generator [Tuple [str , Module ], None , None ]:
298
298
"""
299
299
Yields modules that do not have any submodules except observers. The observers
@@ -320,6 +320,11 @@ def iter_named_leaf_modules(model: Module) -> Generator[Tuple[str, Module], None
320
320
yield name , submodule
321
321
322
322
323
+ @deprecated (
324
+ message = "This function will be removed in a future release. "
325
+ "Please use `model.named_modules()` and filter by "
326
+ "compressed_tensors.InternalModule if neceessary"
327
+ )
323
328
def iter_named_quantizable_modules (
324
329
model : Module ,
325
330
include_children : bool = True ,
@@ -330,7 +335,6 @@ def iter_named_quantizable_modules(
330
335
Yield name and submodule of
331
336
- leaf modules, set by include_children
332
337
- attention modyles, set by include_attn
333
-
334
338
:param model: model to get leaf modules of
335
339
:param include_children: flag to get the leaf modules
336
340
:param inlcude_attn: flag to get the attention modules
@@ -397,34 +401,6 @@ def can_quantize(value: torch.Tensor, quant_args: "QuantizationArgs") -> bool:
397
401
return bit_depth > quant_args .num_bits
398
402
399
403
400
- def calculate_compression_ratio (model : Module ) -> float :
401
- """
402
- Calculates the quantization compression ratio of a pytorch model, based on the
403
- number of bits needed to represent the total weights in compressed form. Does not
404
- take into account activation quantizatons.
405
-
406
- :param model: pytorch module to calculate compression ratio for
407
- :return: compression ratio of the whole model
408
- """
409
- total_compressed = 0.0
410
- total_uncompressed = 0.0
411
- for name , submodule in tqdm (
412
- iter_named_leaf_modules (model ),
413
- desc = "Calculating quantization compression ratio" ,
414
- ):
415
- for parameter in model .parameters ():
416
- uncompressed_bits = get_torch_bit_depth (parameter )
417
- compressed_bits = uncompressed_bits
418
- if is_module_quantized (submodule ) and submodule .quantization_scheme .weights :
419
- compressed_bits = submodule .quantization_scheme .weights .num_bits
420
-
421
- num_weights = parameter .numel ()
422
- total_compressed += compressed_bits * num_weights
423
- total_uncompressed += uncompressed_bits * num_weights
424
-
425
- return total_uncompressed / total_compressed
426
-
427
-
428
404
def is_kv_cache_quant_scheme (scheme : QuantizationScheme ) -> bool :
429
405
"""
430
406
Check whether the QuantizationScheme targets the kv cache.
0 commit comments