simplify

kylesayrs · kylesayrs · commit 2fabb8cc38d7 · 2025-10-08T12:12:18.000-04:00
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/tests/mock_observer.py b/tests/mock_observer.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from abc import abstractmethod
 from typing import Tuple
 from weakref import ref
 
@@ -23,37 +22,24 @@
     generate_gparam,
     strategy_cdiv,
 )
-from compressed_tensors.utils import getattr_chain
 
 
-base_name_to_scheme_field = {
-    "q": "input_activations",
-    "k": "input_activations",
-    "v": "input_activations",
-    "input": "input_activations",
-    "weight": "weights",
-    "output": "output_activations",
-}
-
-
-class ObserverBase(torch.nn.Module):
-    def __init__(self, module: torch.nn.Module, base_name: str):
+class MockMinMaxObserver(torch.nn.Module):
+    def __init__(self, base_name: str, args: QuantizationArgs, module: torch.nn.Module):
         super().__init__()
         self.parent = ref(module)
         self.base_name = base_name
+        self.args = args
 
-        self.scheme_field = base_name_to_scheme_field[base_name]
-        self.args: QuantizationArgs = getattr_chain(
-            module, f"quantization_scheme.{self.scheme_field}"
-        )
-
-        # used for moving averages and testing
+        # used for testing
         self.min_vals = None
         self.max_vals = None
 
-    @abstractmethod
     def get_min_max(self, observed: torch.Tensor):
-        ...
+        min_vals = torch.amin(observed, dim=(0, -1))
+        max_vals = torch.amax(observed, dim=(0, -1))
+
+        return min_vals, max_vals
 
     def forward(self, observed: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
         observed = flatten_for_quantization(observed, self.base_name, self.args)
@@ -71,46 +57,12 @@ def forward(self, observed: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
 
     def get_global_scale(self, observed: torch.Tensor):
         observed = observed.reshape((1, 1, -1))  # per tensor reshape
-
         min_vals, max_vals = self.get_min_max(observed)
-
         global_scale = generate_gparam(min_vals, max_vals)
 
         return global_scale
 
 
-class MockMinMaxObserver(ObserverBase):
-    def __init__(self, module: torch.nn.Module, base_name: str):
-        super().__init__(module, base_name)
-
-    def get_min_max(self, observed: torch.Tensor):
-        min_vals = torch.amin(observed, dim=(0, -1))
-        max_vals = torch.amax(observed, dim=(0, -1))
-
-        return min_vals, max_vals
-
-
-class MockMovingMinMaxObserver(ObserverBase):
-    def __init__(self, module: torch.nn.Module, base_name: str):
-        super().__init__(module, base_name)
-
-        self.averaging_constant = self.args.observer_kwargs.get(
-            "averaging_constant", 0.01
-        )
-
-    def get_min_max(self, observed: torch.Tensor):
-        min_vals = torch.amin(observed, dim=(0, -1))
-        max_vals = torch.amax(observed, dim=(0, -1))
-
-        if self.min_vals is not None:
-            # FUTURE: consider scaling by num observations (first dim)
-            #         rather than reducing by first dim
-            min_vals = torch.lerp(self.min_vals, min_vals, self.averaging_constant)
-            max_vals = torch.lerp(self.max_vals, max_vals, self.averaging_constant)
-
-        return min_vals, max_vals
-
-
 def flatten_for_quantization(
     value: torch.Tensor, base_name: str, args: QuantizationArgs
 ) -> torch.Tensor:
diff --git a/tests/test_quantization/lifecycle/test_static_lifecycle.py b/tests/test_quantization/lifecycle/test_static_lifecycle.py
@@ -22,7 +22,7 @@
 )
 from compressed_tensors.quantization.quant_args import QuantizationArgs
 from compressed_tensors.quantization.quant_config import QuantizationStatus
-from tests.observer import MockMinMaxObserver
+from tests.mock_observer import MockMinMaxObserver
 
 
 @pytest.mark.parametrize(
@@ -151,7 +151,7 @@ def test_static_weight_quantization(
     scheme = QuantizationScheme(targets=[], weights=args)
     initialize_module_for_quantization(linear, scheme)
     assert getattr(linear, "quantization_scheme") is scheme
-    linear.weight_observer = MockMinMaxObserver(linear, base_name="weight")
+    linear.weight_observer = MockMinMaxObserver("weight", args, linear)
 
     # calibrate_global_scale
     if hasattr(linear, "weight_global_scale"):
@@ -242,7 +242,7 @@ def test_static_activation_quantization(
     scheme = QuantizationScheme(targets=[], input_activations=args)
     initialize_module_for_quantization(linear, scheme)
     assert getattr(linear, "quantization_scheme") is scheme
-    linear.input_observer = MockMinMaxObserver(linear, base_name="input")
+    linear.input_observer = MockMinMaxObserver("input", args, linear)
 
     # calibrate quantization parameters
     def calibrate_input_hook(_, args):
@@ -275,6 +275,7 @@ class MockAttention(torch.nn.Module):
     pass
 
 
+@pytest.mark.filterwarnings("ignore::UserWarning")
 @pytest.mark.parametrize(
     "args,exp_min_val,exp_max_val,exp_quant,exp_loss",
     [
@@ -328,7 +329,7 @@ def test_static_attention_quantization(
     )
     attention.quantization_scheme = scheme
     attention.quantization_status = QuantizationStatus.INITIALIZED
-    attention.k_observer = MockMinMaxObserver(attention, base_name="k")
+    attention.k_observer = MockMinMaxObserver("k", args, attention)
 
     # calibrate quantization parameters
     if scheme.input_activations.dynamic is False: