@@ -146,38 +146,39 @@ def apply_quantization_config(
146
146
for target in scheme .targets :
147
147
target_to_scheme [target ] = scheme
148
148
149
- # mark appropriate layers for quantization by setting their quantization schemes
150
- for name , submodule in match_named_modules (
151
- model , scheme .targets , config .ignore , warn_on_fail = True
149
+ # mark appropriate layers for quantization by setting their quantization schemes
150
+ for name , submodule in match_named_modules (
151
+ model , target_to_scheme , config .ignore , warn_on_fail = True
152
+ ):
153
+ # potentially fix module name to remove FSDP wrapper prefix
154
+ name = fix_fsdp_module_name (name )
155
+
156
+ # mark modules to be quantized by adding
157
+ # quant scheme to the matching layers
158
+ scheme = _scheme_from_targets (target_to_scheme , scheme .targets , name )
159
+ if (
160
+ run_compressed
161
+ and config .format != CompressionFormat .dense .value
162
+ and isinstance (submodule , torch .nn .Linear )
152
163
):
153
- # potentially fix module name to remove FSDP wrapper prefix
154
- name = fix_fsdp_module_name (name )
155
-
156
- # mark modules to be quantized by adding
157
- # quant scheme to the matching layers
158
- scheme = _scheme_from_targets (target_to_scheme , scheme .targets , name )
159
- if run_compressed :
160
- format = config .format
161
- if format != CompressionFormat .dense .value :
162
- if isinstance (submodule , torch .nn .Linear ):
163
- from compressed_tensors .linear .compressed_linear import (
164
- CompressedLinear ,
165
- )
166
-
167
- compressed_linear = CompressedLinear .from_linear (
168
- submodule ,
169
- quantization_scheme = scheme ,
170
- quantization_format = format ,
171
- )
172
- replace_module (model , name , compressed_linear )
173
-
174
- # target matched - add layer and scheme to target list
175
- submodule .quantization_scheme = scheme
176
-
177
- names_to_scheme [name ] = submodule .quantization_scheme
178
-
179
- # apply current quantization status to each targeted submodule
180
- apply_quantization_status (submodule , config .quantization_status )
164
+ from compressed_tensors .linear .compressed_linear import (
165
+ CompressedLinear ,
166
+ )
167
+
168
+ compressed_linear = CompressedLinear .from_linear (
169
+ submodule ,
170
+ quantization_scheme = scheme ,
171
+ quantization_format = config .format ,
172
+ )
173
+ replace_module (model , name , compressed_linear )
174
+
175
+ # target matched - add layer and scheme to target list
176
+ submodule .quantization_scheme = scheme
177
+
178
+ names_to_scheme [name ] = submodule .quantization_scheme
179
+
180
+ # apply current quantization status to each targeted submodule
181
+ apply_quantization_status (submodule , config .quantization_status )
181
182
182
183
# TODO warn on ignore not being found, this is useful in debugging
183
184
# if config.ignore is not None and ignored_submodules is not None:
0 commit comments