Skip to content

Commit 2ce9775

Browse files
cleanup
Signed-off-by: Brian Dellabetta <[email protected]>
1 parent 550c0ad commit 2ce9775

File tree

1 file changed

+32
-31
lines changed
  • src/compressed_tensors/quantization/lifecycle

1 file changed

+32
-31
lines changed

src/compressed_tensors/quantization/lifecycle/apply.py

Lines changed: 32 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -146,38 +146,39 @@ def apply_quantization_config(
146146
for target in scheme.targets:
147147
target_to_scheme[target] = scheme
148148

149-
# mark appropriate layers for quantization by setting their quantization schemes
150-
for name, submodule in match_named_modules(
151-
model, scheme.targets, config.ignore, warn_on_fail=True
149+
# mark appropriate layers for quantization by setting their quantization schemes
150+
for name, submodule in match_named_modules(
151+
model, target_to_scheme, config.ignore, warn_on_fail=True
152+
):
153+
# potentially fix module name to remove FSDP wrapper prefix
154+
name = fix_fsdp_module_name(name)
155+
156+
# mark modules to be quantized by adding
157+
# quant scheme to the matching layers
158+
scheme = _scheme_from_targets(target_to_scheme, scheme.targets, name)
159+
if (
160+
run_compressed
161+
and config.format != CompressionFormat.dense.value
162+
and isinstance(submodule, torch.nn.Linear)
152163
):
153-
# potentially fix module name to remove FSDP wrapper prefix
154-
name = fix_fsdp_module_name(name)
155-
156-
# mark modules to be quantized by adding
157-
# quant scheme to the matching layers
158-
scheme = _scheme_from_targets(target_to_scheme, scheme.targets, name)
159-
if run_compressed:
160-
format = config.format
161-
if format != CompressionFormat.dense.value:
162-
if isinstance(submodule, torch.nn.Linear):
163-
from compressed_tensors.linear.compressed_linear import (
164-
CompressedLinear,
165-
)
166-
167-
compressed_linear = CompressedLinear.from_linear(
168-
submodule,
169-
quantization_scheme=scheme,
170-
quantization_format=format,
171-
)
172-
replace_module(model, name, compressed_linear)
173-
174-
# target matched - add layer and scheme to target list
175-
submodule.quantization_scheme = scheme
176-
177-
names_to_scheme[name] = submodule.quantization_scheme
178-
179-
# apply current quantization status to each targeted submodule
180-
apply_quantization_status(submodule, config.quantization_status)
164+
from compressed_tensors.linear.compressed_linear import (
165+
CompressedLinear,
166+
)
167+
168+
compressed_linear = CompressedLinear.from_linear(
169+
submodule,
170+
quantization_scheme=scheme,
171+
quantization_format=config.format,
172+
)
173+
replace_module(model, name, compressed_linear)
174+
175+
# target matched - add layer and scheme to target list
176+
submodule.quantization_scheme = scheme
177+
178+
names_to_scheme[name] = submodule.quantization_scheme
179+
180+
# apply current quantization status to each targeted submodule
181+
apply_quantization_status(submodule, config.quantization_status)
181182

182183
# TODO warn on ignore not being found, this is useful in debugging
183184
# if config.ignore is not None and ignored_submodules is not None:

0 commit comments

Comments
 (0)