Skip to content

Commit 2b79056

Browse files
authored
Observer Restructure: Remove Observers, calibration, and applying frozen steps from lifecycle (#189)
* temporary workaround * separate out calibration from forward pass * fix missing import * fix tests * update all other tests * clean * update * clean-up * fix test case * remove calibration and init observer steps * update * update * clean-up/fix * cleanup * cleanup * remove cache * clean-up * remove frozen * more clean-up * remove observer, cache, and frozen state * update more test cases * fix bit_depth test * fix more tests * clean-up remaining tests * clean-up * dont skip * more clean-up * fix
1 parent 13b5c0b commit 2b79056

33 files changed

+420
-1763
lines changed

src/compressed_tensors/quantization/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,3 @@
1919
from .quant_config import *
2020
from .quant_scheme import *
2121
from .lifecycle import *
22-
from .cache import QuantizedKVParameterCache

src/compressed_tensors/quantization/cache.py

Lines changed: 0 additions & 200 deletions
This file was deleted.

src/compressed_tensors/quantization/lifecycle/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@
1515
# flake8: noqa
1616
# isort: skip_file
1717

18-
from .calibration import *
1918
from .forward import *
20-
from .frozen import *
2119
from .initialize import *
2220
from .compressed import *
2321
from .apply import *

src/compressed_tensors/quantization/lifecycle/apply.py

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,9 @@
2222

2323
import torch
2424
from compressed_tensors.config import CompressionFormat
25-
from compressed_tensors.quantization.lifecycle.calibration import (
26-
set_module_for_calibration,
27-
)
2825
from compressed_tensors.quantization.lifecycle.compressed import (
2926
compress_quantized_weights,
3027
)
31-
from compressed_tensors.quantization.lifecycle.frozen import freeze_module_quantization
3228
from compressed_tensors.quantization.lifecycle.initialize import (
3329
initialize_module_for_quantization,
3430
)
@@ -233,6 +229,7 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
233229
:param model: model to apply quantization to
234230
:param status: status to update the module to
235231
"""
232+
236233
current_status = infer_quantization_status(model)
237234

238235
if status >= QuantizationStatus.INITIALIZED > current_status:
@@ -243,18 +240,6 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
243240
)
244241
)
245242

246-
if current_status < status >= QuantizationStatus.CALIBRATION > current_status:
247-
# only quantize weights up front when our end goal state is calibration,
248-
# weight quantization parameters are already loaded for frozen/compressed
249-
quantize_weights_upfront = status == QuantizationStatus.CALIBRATION
250-
model.apply(
251-
lambda module: set_module_for_calibration(
252-
module, quantize_weights_upfront=quantize_weights_upfront
253-
)
254-
)
255-
if current_status < status >= QuantizationStatus.FROZEN > current_status:
256-
model.apply(freeze_module_quantization)
257-
258243
if current_status < status >= QuantizationStatus.COMPRESSED > current_status:
259244
model.apply(compress_quantized_weights)
260245

src/compressed_tensors/quantization/lifecycle/calibration.py

Lines changed: 0 additions & 80 deletions
This file was deleted.

0 commit comments

Comments
 (0)