lifecycle updates for overwriting config

brian-dellabetta · brian-dellabetta · commit fc2e1024d457 · 2025-09-10T19:31:41.000Z
Signed-off-by: Brian Dellabetta &lt;bdellabe@redhat.com&gt;
diff --git a/src/compressed_tensors/quantization/lifecycle/apply.py b/src/compressed_tensors/quantization/lifecycle/apply.py
@@ -206,34 +206,33 @@ def process_kv_cache_config(
     return config
 
 
-def apply_quantization_status(model: Module, status: QuantizationStatus):
+def apply_quantization_status(module: Module, status: QuantizationStatus):
     """
     Applies in place the quantization lifecycle up to the given status
 
-    :param model: model to apply quantization to
+    :param module: module to apply quantization to
     :param status: status to update the module to
     """
 
-    if status >= QuantizationStatus.INITIALIZED:
-        force_zero_point_init = status != QuantizationStatus.COMPRESSED
-
-        # When decompressing, we set the scale_dtype as the model's dtype
-        # This is because the normal workflow of using the weight's dtype
-        # will be incorrect as the model weight will be compressed
-        # Therfore, use the dtype set by the user using the PretrainedModel
-        scale_dtype = None
-        if status == QuantizationStatus.FROZEN:
-            if hasattr(model, "dtype"):
-                scale_dtype = model.dtype
-
-        model.apply(
-            lambda module: initialize_module_for_quantization(
-                module, force_zero_point=force_zero_point_init, scale_dtype=scale_dtype
-            )
+    force_zero_point_init = status != QuantizationStatus.COMPRESSED
+
+    # When decompressing, we set the scale_dtype as the model's dtype
+    # This is because the normal workflow of using the weight's dtype
+    # will be incorrect as the model weight will be compressed
+    # Therfore, use the dtype set by the user using the PretrainedModel
+    scale_dtype = None
+    if status == QuantizationStatus.FROZEN:
+        if hasattr(module, "dtype"):
+            scale_dtype = module.dtype
+
+    module.apply(
+        lambda module: initialize_module_for_quantization(
+            module, force_zero_point=force_zero_point_init, scale_dtype=scale_dtype
         )
+    )
 
     if status >= QuantizationStatus.COMPRESSED:
-        model.apply(compress_quantized_weights)
+        module.apply(compress_quantized_weights)
 
 
 @deprecated(