Skip to content

Commit 2f22bef

Browse files
author
Sara Adkins
authored
dont set quantization data on reload (#123) (#125)
1 parent a4c86dc commit 2f22bef

File tree

2 files changed

+12
-3
lines changed

2 files changed

+12
-3
lines changed

src/compressed_tensors/quantization/lifecycle/apply.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,14 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
195195
model.apply(initialize_module_for_quantization)
196196

197197
if current_status < status >= QuantizationStatus.CALIBRATION > current_status:
198-
model.apply(set_module_for_calibration)
198+
# only quantize weights up front when our end goal state is calibration,
199+
# weight quantization parameters are already loaded for frozen/compressed
200+
quantize_weights_upfront = status == QuantizationStatus.CALIBRATION
201+
model.apply(
202+
lambda module: set_module_for_calibration(
203+
module, quantize_weights_upfront=quantize_weights_upfront
204+
)
205+
)
199206
if current_status < status >= QuantizationStatus.FROZEN > current_status:
200207
model.apply(freeze_module_quantization)
201208

src/compressed_tensors/quantization/lifecycle/calibration.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,16 @@
2828
_LOGGER = logging.getLogger(__name__)
2929

3030

31-
def set_module_for_calibration(module: Module):
31+
def set_module_for_calibration(module: Module, quantize_weights_upfront: bool = True):
3232
"""
3333
marks a layer as ready for calibration which activates observers
3434
to update scales and zero points on each forward pass
3535
3636
apply to full model with `model.apply(set_module_for_calibration)`
3737
3838
:param module: module to set for calibration
39+
:param quantize_weights_upfront: whether to automatically run weight quantization at the
40+
start of calibration
3941
"""
4042
if not getattr(module, "quantization_scheme", None):
4143
# no quantization scheme nothing to do
@@ -49,7 +51,7 @@ def set_module_for_calibration(module: Module):
4951
"to re-calibrate a frozen module"
5052
)
5153

52-
if module.quantization_scheme.weights is not None:
54+
if quantize_weights_upfront and module.quantization_scheme.weights is not None:
5355
# set weight scale and zero_point up front, calibration data doesn't affect it
5456
observer = module.weight_observer
5557

0 commit comments

Comments
 (0)