Skip to content

Commit c49380a

Browse files
committed
Fix line lengths
Signed-off-by: Fynn Schmitt-Ulms <[email protected]>
1 parent 38f8161 commit c49380a

File tree

11 files changed

+53
-48
lines changed

11 files changed

+53
-48
lines changed

src/compressed_tensors/compressors/model_compressors/model_compressor.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -562,11 +562,12 @@ def decompress(self, model_path: str, model: Module):
562562
:param model_path: path to compressed weights
563563
:param model: pytorch model to load decompressed weights into
564564
565-
Note: decompress makes use of both _replace_sparsity_weights and _replace_weights
566-
The variations in these methods are a result of the subtle variations between the sparsity
567-
and quantization compressors. Specifically, quantization compressors return not just the
568-
decompressed weight, but the quantization parameters (e.g scales, zero_point) whereas sparsity
569-
compressors only return the decompressed weight.
565+
Note: decompress makes use of both _replace_sparsity_weights and
566+
_replace_weights. The variations in these methods are a result of the subtle
567+
variations between the sparsity and quantization compressors. Specifically,
568+
quantization compressors return not just the decompressed weight, but the
569+
quantization parameters (e.g scales, zero_point) whereas sparsity compressors
570+
only return the decompressed weight.
570571
571572
"""
572573
model_path = get_safetensors_folder(model_path)
@@ -598,18 +599,17 @@ def decompress(self, model_path: str, model: Module):
598599
with override_quantization_status(
599600
self.quantization_config, QuantizationStatus.FROZEN
600601
):
601-
602602
names_to_scheme = apply_quantization_config(
603603
model, self.quantization_config
604604
)
605605
# Load activation scales/zp or any other quantization parameters
606-
# Conditionally load the weight quantization parameters if we have a dense compressor
607-
# Or if a sparsity compressor has already been applied
606+
# Conditionally load the weight quantization parameters if we have a
607+
# dense compressor or if a sparsity compressor has already been applied
608608
load_pretrained_quantization_parameters(
609609
model,
610610
model_path,
611-
# TODO: all weight quantization params will be moved to the compressor in a follow-up
612-
# including initialization
611+
# TODO: all weight quantization params will be moved to the
612+
# compressor in a follow-up including initialization
613613
load_weight_quantization=(
614614
sparse_decompressed
615615
or isinstance(self.quantization_compressor, DenseCompressor)
@@ -695,7 +695,6 @@ def _replace_sparsity_weights(self, dense_weight_generator, model: Module):
695695
:param model: The model whose weights are to be updated.
696696
"""
697697
for name, data in tqdm(dense_weight_generator, desc="Decompressing model"):
698-
699698
split_name = name.split(".")
700699
prefix, param_name = ".".join(split_name[:-1]), split_name[-1]
701700
module = operator.attrgetter(prefix)(model)
@@ -731,9 +730,10 @@ def _replace_weights(self, dense_weight_generator, model: Module):
731730
for param_name, param_data in data.items():
732731
if hasattr(module, param_name):
733732
# If compressed, will have an incorrect dtype for transformers >4.49
734-
# TODO: we can also just skip initialization of scales/zp if in decompression in init
735-
# to be consistent with loading which happens later as well
736-
# however, update_data does a good shape check - should be moved to the compressor
733+
# TODO: we can also just skip initialization of scales/zp if in
734+
# decompression in init to be consistent with loading which happens
735+
# later as well however, update_data does a good shape check -
736+
# should be moved to the compressor
737737
if param_name == "weight":
738738
delattr(module, param_name)
739739
requires_grad = param_data.dtype in (

src/compressed_tensors/compressors/quantized_compressors/base.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,8 @@ def compress(
107107
compressed_dict[name] = value.to(compression_device)
108108
continue
109109

110-
# compress values on meta if loading from meta otherwise on cpu (memory movement too expensive)
110+
# compress values on meta if loading from meta otherwise on cpu (memory
111+
# movement too expensive)
111112
module_path = prefix[:-1] if prefix.endswith(".") else prefix
112113
quant_args = names_to_scheme[module_path].weights
113114
compressed_values = self.compress_weight(

src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,9 @@ def unpack_fp4_from_uint8(
161161
) -> torch.Tensor:
162162
"""
163163
Unpacks uint8 values into fp4. Each uint8 consists of two fp4 values
164-
(i.e. first four bits correspond to one fp4 value, last four corresond to a consecutive
165-
fp4 value). The bits represent an index, which are mapped to an fp4 value.
164+
(i.e. first four bits correspond to one fp4 value, last four correspond to a
165+
consecutive fp4 value). The bits represent an index, which are mapped to an fp4
166+
value.
166167
167168
:param a: tensor to unpack
168169
:param m: original dim 0 size of the unpacked tensor

src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,8 @@ def compress_weight(
135135
compressed_dict["weight_shape"] = weight_shape
136136
compressed_dict["weight_packed"] = packed_weight
137137

138-
# We typically don't compress zp; apart from when using the packed_compressor and when storing group/channel zp
138+
# We typically don't compress zp; apart from when using the packed_compressor
139+
# and when storing group/channel zp
139140
if not quantization_args.symmetric and quantization_args.strategy in [
140141
QuantizationStrategy.GROUP.value,
141142
QuantizationStrategy.CHANNEL.value,
@@ -166,7 +167,8 @@ def decompress_weight(
166167
num_bits = quantization_args.num_bits
167168
unpacked = unpack_from_int32(weight, num_bits, original_shape)
168169

169-
# NOTE: this will fail decompression as we don't currently handle packed zp on decompression
170+
# NOTE: this will fail decompression as we don't currently handle packed zp on
171+
# decompression
170172
if not quantization_args.symmetric and quantization_args.strategy in [
171173
QuantizationStrategy.GROUP.value,
172174
QuantizationStrategy.CHANNEL.value,

src/compressed_tensors/quantization/lifecycle/apply.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,14 @@ def load_pretrained_quantization_parameters(
7373
Loads the quantization parameters (scale and zero point) from model_name_or_path to
7474
a model that has already been initialized with a quantization config.
7575
76-
NOTE: Will always load inputs/output parameters.
77-
Will conditioanlly load weight parameters, if load_weight_quantization is set to True.
76+
NOTE: Will always load inputs/output parameters. Will conditioanlly load weight
77+
parameters, if load_weight_quantization is set to True.
7878
7979
:param model: model to load pretrained quantization parameters to
8080
:param model_name_or_path: Hugging Face stub or local folder containing a quantized
8181
model, which is used to load quantization parameters
82-
:param load_weight_quantization: whether or not the weight quantization parameters shoud
83-
be laoded
82+
:param load_weight_quantization: whether or not the weight quantization parameters
83+
should be loaded
8484
"""
8585
model_path = get_safetensors_folder(model_name_or_path)
8686
mapping = get_quantization_parameter_to_path_mapping(model_path)

src/compressed_tensors/quantization/lifecycle/forward.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,8 @@ def _process_quantization(
200200
q_min, q_max = calculate_range(args, x.device)
201201
group_size = args.group_size
202202

203-
# blockwise FP8: quantize per 2D block, supports block_structure for static block quant
203+
# blockwise FP8: quantize per 2D block, supports block_structure for static block
204+
# quantization
204205
if args.strategy == QuantizationStrategy.BLOCK:
205206
original_shape = x.shape
206207
rows, cols = x.shape[-2], x.shape[-1]
@@ -209,8 +210,8 @@ def _process_quantization(
209210
# Ensure exact division (tensor dimensions must be divisible by block size)
210211
if rows % block_height != 0:
211212
raise ValueError(
212-
f"Tensor height {rows} is not divisible by block_height {block_height}. "
213-
f"Block quantization requires exact division."
213+
f"Tensor height {rows} is not divisible by block_height {block_height}."
214+
f" Block quantization requires exact division."
214215
)
215216
if cols % block_width != 0:
216217
raise ValueError(

src/compressed_tensors/quantization/lifecycle/initialize.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import math
1818
import warnings
1919
from enum import Enum
20-
from typing import List, Optional
20+
from typing import Optional
2121

2222
import torch
2323
from compressed_tensors.quantization.lifecycle.forward import (
@@ -87,7 +87,6 @@ def initialize_module_for_quantization(
8787
_initialize_attn_scales(module)
8888

8989
else:
90-
9190
if scheme.input_activations is not None:
9291
_initialize_scale_zero_point(
9392
module,
@@ -183,7 +182,8 @@ def _initialize_scale_zero_point(
183182
num_groups = math.ceil(weight_shape[1] / quantization_args.group_size)
184183
expected_shape = (weight_shape[0], max(num_groups, 1))
185184
elif quantization_args.strategy == QuantizationStrategy.BLOCK:
186-
# For block quantization, scale shape should match number of blocks - only for weights
185+
# For block quantization, scale shape should match number of blocks - only
186+
# for weights
187187
if quantization_args.block_structure is None:
188188
raise ValueError(
189189
"Block quantization requires block_structure to be specified"
@@ -196,9 +196,10 @@ def _initialize_scale_zero_point(
196196
# Warn if dimensions don't divide evenly
197197
if rows % block_height != 0 or cols % block_width != 0:
198198
warnings.warn(
199-
f"Block quantization: tensor shape {weight_shape} does not divide evenly "
200-
f"by block structure {quantization_args.block_structure}. "
201-
f"Some blocks will be incomplete which may affect quantization quality.",
199+
f"Block quantization: tensor shape {weight_shape} does not divide"
200+
f"evenly by block structure {quantization_args.block_structure}. "
201+
f"Some blocks will be incomplete which may affect quantization"
202+
"quality.",
202203
UserWarning,
203204
)
204205

src/compressed_tensors/quantization/quant_args.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -211,23 +211,21 @@ def validate_group(cls, value) -> Union[int, None]:
211211
def validate_block_structure(cls, value) -> Optional[List[int]]:
212212
if value is None:
213213
return value
214+
invalid_block_structure_msg = (
215+
f"Invalid block_structure '{value}'. Must be a list of two ints"
216+
" [rows, cols]."
217+
)
214218
# For backward compatibility, allow string format "2x4", "8x16", etc.
215219
if isinstance(value, str):
216220
try:
217221
return [int(x) for x in value.split("x")]
218222
except Exception:
219-
raise ValueError(
220-
f"Invalid block_structure '{value}'. Must be a list of two ints [rows, cols]."
221-
)
223+
raise ValueError(invalid_block_structure_msg)
222224
if isinstance(value, (list, tuple)):
223225
if len(value) != 2 or not all(isinstance(v, int) for v in value):
224-
raise ValueError(
225-
f"Invalid block_structure '{value}'. Must be a list of two ints [rows, cols]."
226-
)
226+
raise ValueError(invalid_block_structure_msg)
227227
return list(value)
228-
raise ValueError(
229-
f"Invalid block_structure '{value}'. Must be a list of two ints [rows, cols]."
230-
)
228+
raise ValueError(invalid_block_structure_msg)
231229

232230
@field_validator("strategy", mode="before")
233231
def validate_strategy(cls, value) -> Union[QuantizationStrategy, None]:
@@ -307,7 +305,7 @@ def validate_model_after(model: "QuantizationArgs") -> "QuantizationArgs":
307305
)
308306
if strategy not in supported_strategies:
309307
raise ValueError(
310-
f"One of {supported_strategies} must be used for dynamic quantization"
308+
f"One of {supported_strategies} must be used for dynamic quant."
311309
)
312310

313311
if (
@@ -322,7 +320,7 @@ def validate_model_after(model: "QuantizationArgs") -> "QuantizationArgs":
322320
observer != "memoryless"
323321
): # avoid annoying users with old configs
324322
warnings.warn(
325-
"No observer is used for dynamic quantization, setting to None"
323+
"No observer is used for dynamic quant., setting to None"
326324
)
327325
observer = None
328326
else:

src/compressed_tensors/quantization/quant_scheme.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,10 @@ def validate_model_after(model: "QuantizationScheme") -> "QuantizationScheme":
7272
):
7373
warnings.warn(
7474
"Using GROUP strategy for both weights and input_activations "
75-
f"with different group sizes ({weights.group_size} vs {inputs.group_size}) "
76-
"may complicate fused kernel implementations. Consider using "
77-
"TENSOR_GROUP strategy for both or matching group sizes.",
75+
f"with different group sizes ({weights.group_size} vs "
76+
f"{inputs.group_size}) may complicate fused kernel implementations. "
77+
"Consider using TENSOR_GROUP strategy for both or matching group"
78+
" sizes.",
7879
UserWarning,
7980
stacklevel=2,
8081
)

src/compressed_tensors/utils/match.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def match_modules_set(
110110
Yields modules grouped with the same order and size as `targets`.
111111
Values are returned in order of `model.named_modules()`
112112
113-
For example, the following targets would yield module belonging to the following layers:
113+
E.g. the following targets would yield module belonging to the following layers:
114114
```python3
115115
match_modules_set(model, ["q_proj", "k_proj", "v_proj"]) == (
116116
(

0 commit comments

Comments
 (0)