revert global disjointness

kylesayrs · kylesayrs · commit dd9132977960 · 2025-09-18T07:11:29.000-04:00
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/llmcompressor/modifiers/quantization/calibration.py b/src/llmcompressor/modifiers/quantization/calibration.py
@@ -147,6 +147,7 @@ def update_weight_global_scale(module: Module):
         should_calculate_gparam=True,
         should_calculate_qparams=False,
     )
+    module.weight_observer.reset()
 
 
 def update_weight_zp_scale(module: Module):
diff --git a/src/llmcompressor/observers/base.py b/src/llmcompressor/observers/base.py
@@ -51,8 +51,12 @@ def forward(
         :return: tuple of scale and zero point based on last observed value
         """
         self.record_observed_tokens(observed)
+
         if should_calculate_gparam:
+            # NOTE: this function updates running min/max values, which leads to
+            # running values updating twice
             return self.get_gparam(observed=observed)
+        
         return self.get_qparams(
             observed=observed,
             g_idx=g_idx,
diff --git a/src/llmcompressor/observers/min_max.py b/src/llmcompressor/observers/min_max.py
@@ -3,7 +3,7 @@
 import torch
 from compressed_tensors.quantization.quant_args import QuantizationArgs
 from compressed_tensors.quantization.utils import calculate_qparams, generate_gparam
-from compressed_tensors.utils import deprecated, patch_attr
+from compressed_tensors.utils import deprecated
 
 from llmcompressor.observers.base import Observer
 
@@ -87,12 +87,11 @@ def calculate_gparam(self, observed: torch.Tensor) -> torch.Tensor:
         :param observed: observed tensor to calculate quantization parameters for
         :return: updated global scale derived from the observed tensor
         """
-
-        # patch to avoid affecting running means
-        with patch_attr(self, "min_val", {}), patch_attr(self, "max_val", {}):
-            updated_min_val, updated_max_val = self.calculate_updated_min_max(
-                observed=observed
-            )
+        # NOTE: this function updates running min/max values, which leads to
+        # running values updating twice
+        updated_min_val, updated_max_val = self.calculate_updated_min_max(
+            observed=observed
+        )
         return generate_gparam(
             updated_min_val=updated_min_val, updated_max_val=updated_max_val
         )
diff --git a/tests/llmcompressor/conftest.py b/tests/llmcompressor/conftest.py
@@ -61,7 +61,7 @@ def check_for_created_files():
         f"Created files: {set(end_files_root) - set(start_files_root)}"
     )
 
-    max_allowed_sized_temp_files_megabytes = 1.5
+    max_allowed_sized_temp_files_megabytes = 1
     end_files_temp = _get_files(directory=tempfile.gettempdir())
     created_temp_files = set(end_files_temp) - set(start_files_temp)
     # pytest temp files are automatically deleted, exclude from size calculation

Original file line number	Diff line number	Diff line change
`@@ -147,6 +147,7 @@ def update_weight_global_scale(module: Module):`
`147`	`147`	`should_calculate_gparam=True,`
`148`	`148`	`should_calculate_qparams=False,`
`149`	`149`	`)`
	`150`	`+ module.weight_observer.reset()`
`150`	`151`
`151`	`152`
`152`	`153`	`def update_weight_zp_scale(module: Module):`
Original file line number	Diff line number	Diff line change
`@@ -61,7 +61,7 @@ def check_for_created_files():`
`61`	`61`	`f"Created files: {set(end_files_root) - set(start_files_root)}"`
`62`	`62`	`)`
`63`	`63`
`64`		`- max_allowed_sized_temp_files_megabytes = 1.5`
	`64`	`+ max_allowed_sized_temp_files_megabytes = 1`
`65`	`65`	`end_files_temp = _get_files(directory=tempfile.gettempdir())`
`66`	`66`	`created_temp_files = set(end_files_temp) - set(start_files_temp)`
`67`	`67`	`# pytest temp files are automatically deleted, exclude from size calculation`