File tree Expand file tree Collapse file tree 3 files changed +7
-1
lines changed Expand file tree Collapse file tree 3 files changed +7
-1
lines changed Original file line number Diff line number Diff line change @@ -271,6 +271,9 @@ def compress(
271
271
v_proj_has_quant_output = 0
272
272
for name , module in model .named_modules ():
273
273
if not hasattr (module , "quantization_scheme" ):
274
+ # We still want to count non-quantized q_proj
275
+ if name .endswith (".q_proj" ):
276
+ q_proj_has_no_quant_output += 1
274
277
continue
275
278
out_act = module .quantization_scheme .output_activations
276
279
if name .endswith (".q_proj" ) and out_act is None :
Original file line number Diff line number Diff line change @@ -110,6 +110,7 @@ def is_preset_scheme(name: str) -> bool:
110
110
"""
111
111
return name .upper () in PRESET_SCHEMES
112
112
113
+ UNQUANTIZED = dict ()
113
114
114
115
# 8 bit integer weights and 8 bit activations quantization
115
116
W8A8 = dict (
@@ -208,6 +209,8 @@ def is_preset_scheme(name: str) -> bool:
208
209
)
209
210
210
211
PRESET_SCHEMES = {
212
+ # Unquantized (no-op)
213
+ "UNQUANTIZED" : UNQUANTIZED ,
211
214
# Integer weight only schemes
212
215
"W8A16" : W8A16 ,
213
216
"W4A16" : W4A16 ,
Original file line number Diff line number Diff line change @@ -181,7 +181,7 @@ def calculate_compression_ratio(model: Module) -> float:
181
181
for parameter in model .parameters ():
182
182
uncompressed_bits = get_torch_bit_depth (parameter )
183
183
compressed_bits = uncompressed_bits
184
- if is_module_quantized (submodule ):
184
+ if is_module_quantized (submodule ) and submodule . quantization_scheme . weights :
185
185
compressed_bits = submodule .quantization_scheme .weights .num_bits
186
186
187
187
num_weights = parameter .numel ()
You can’t perform that action at this time.
0 commit comments