WIP

kylesayrs · kylesayrs · commit b4dfb19f5393 · 2025-09-15T12:38:05.000-04:00
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/llmcompressor/observers/base.py b/src/llmcompressor/observers/base.py
@@ -256,7 +256,7 @@ def get_qparams_along_dim(
         # convert negative dims
         dim = [d if d >= 0 else observed.ndim + d for d in dim]
 
-        # reduce all dimensions except the the one pass as argument to this function
+        # reduce all dimensions except the the one passed as argument to this function
         reduce_dims = tuple(idx for idx in range(observed.ndim) if idx not in dim)
         return self.calculate_qparams(
             observed,
diff --git a/src/llmcompressor/observers/min_max.py b/src/llmcompressor/observers/min_max.py
@@ -1,4 +1,4 @@
-from typing import Any, Optional, Tuple
+from typing import Any, Optional, Tuple, Union, Iterable
 
 import torch
 from compressed_tensors.quantization.quant_args import QuantizationArgs
@@ -128,14 +128,23 @@ def calculate_qparams(
     def get_qparams_along_dim(
         self,
         observed: torch.Tensor,
-        dim: int,
+        dim: Union[int, Iterable[int]],
         tensor_id: Optional[Any] = None,
         global_scale: Optional[torch.Tensor] = None,
     ):
         """
         Calculate quantization parameters along the specified dimension
         """
-        reduce_dims = tuple(idx for idx in range(observed.ndim) if idx != dim)
+        # cast to set
+        if isinstance(dim, int):
+            dim = [dim]
+        dim = set(dim)
+
+        # convert negative dims
+        dim = [d if d >= 0 else observed.ndim + d for d in dim]
+
+        # reduce all dimensions except the the one passed as argument to this function
+        reduce_dims = tuple(idx for idx in range(observed.ndim) if idx not in dim)
         return self.calculate_qparams(
             observed,
             reduce_dims=reduce_dims,
diff --git a/tests/llmcompressor/modifiers/calibration/test_observers.py b/tests/llmcompressor/modifiers/calibration/test_observers.py
@@ -4,9 +4,15 @@
     QuantizationArgs,
     QuantizationScheme,
     initialize_module_for_quantization,
+    QuantizationStatus,
 )
 
-from llmcompressor.modifiers.quantization.calibration import initialize_observer
+from llmcompressor.modifiers.quantization.calibration import (
+    initialize_observer,
+    update_weight_zp_scale,
+    update_weight_global_scale,
+    calibrate_input_hook,
+)
 
 
 @pytest.mark.parametrize(
@@ -59,3 +65,273 @@ def test_observers_update(shape, group_size, actorder):
 def assert_alike(a, b):
     assert a.dtype == b.dtype
     assert a.shape == b.shape
+
+
+@pytest.mark.parametrize(
+    "args,exp_min_val,exp_max_val,exp_tol",
+    [
+        (
+            QuantizationArgs(
+                num_bits=4,
+                type="int",
+                symmetric=True,
+                strategy="tensor",
+                observer="minmax",
+            ),
+            {"default": torch.tensor(0.0, dtype=torch.bfloat16)},
+            {"default": torch.tensor(23.0, dtype=torch.bfloat16)},
+            2.5,
+        ),
+        (
+            QuantizationArgs(
+                num_bits=4,
+                type="int",
+                symmetric=True,
+                strategy="channel",
+                observer="minmax",
+            ),
+            {"default": torch.tensor([[0], [6], [12], [18]], dtype=torch.bfloat16)},
+            {"default": torch.tensor([[5], [11], [17], [23]], dtype=torch.bfloat16)},
+            2.5,
+        ),
+        (
+            QuantizationArgs(
+                num_bits=4,
+                type="int",
+                symmetric=True,
+                strategy="group",
+                group_size=3,
+                observer="minmax",
+            ),
+            {
+                "default": torch.tensor([[0], [6], [12], [18]], dtype=torch.bfloat16),
+                1: torch.tensor([[3], [9], [15], [21]], dtype=torch.bfloat16),
+            },
+            {
+                "default": torch.tensor([[2], [8], [14], [20]], dtype=torch.bfloat16),
+                1: torch.tensor([[5], [11], [17], [23]], dtype=torch.bfloat16),
+            },
+            2.5,
+        ),
+        (
+            QuantizationArgs(
+                num_bits=4,
+                type="float",
+                symmetric=True,
+                strategy="tensor_group",
+                group_size=3,
+                observer="minmax",
+            ),
+            {
+                "default": torch.tensor([[0], [6], [12], [18]], dtype=torch.bfloat16),
+                1: torch.tensor([[3], [9], [15], [21]], dtype=torch.bfloat16),
+            },
+            {
+                "default": torch.tensor([[2], [8], [14], [20]], dtype=torch.bfloat16),
+                1: torch.tensor([[5], [11], [17], [23]], dtype=torch.bfloat16),
+            },
+            5.0,
+        ),
+        (
+            QuantizationArgs(
+                num_bits=4,
+                type="int",
+                symmetric=True,
+                strategy="block",
+                block_structure=[2, 3],
+                observer="minmax",
+            ),
+            {
+                "block_0_0": torch.tensor([[0]], dtype=torch.bfloat16),
+                "block_0_1": torch.tensor([[3]], dtype=torch.bfloat16),
+                "block_1_0": torch.tensor([[12]], dtype=torch.bfloat16),
+                "block_1_1": torch.tensor([[15]], dtype=torch.bfloat16),
+            },
+            {
+                "block_0_0": torch.tensor([[8]], dtype=torch.bfloat16),
+                "block_0_1": torch.tensor([[11]], dtype=torch.bfloat16),
+                "block_1_0": torch.tensor([[20]], dtype=torch.bfloat16),
+                "block_1_1": torch.tensor([[23]], dtype=torch.bfloat16),
+            },
+            2.5,
+        ),
+    ],
+)
+def test_weight_quantization(args, exp_min_val, exp_max_val, exp_tol):
+    # set up weight
+    input_size, output_size = 6, 4
+    linear = torch.nn.Linear(input_size, output_size, bias=False)
+    linear.weight.data = torch.arange(
+        input_size * output_size, dtype=torch.bfloat16
+    ).reshape(output_size, input_size)
+
+    # initialize quantization parameters
+    scheme = QuantizationScheme(targets=[], weights=args)
+    initialize_module_for_quantization(linear, scheme)
+    assert getattr(linear, "quantization_scheme") is scheme
+
+    # calibrate quantization parameters
+    initialize_observer(linear, "weight")
+    update_weight_global_scale(linear)
+    update_weight_zp_scale(linear)
+
+    observer = getattr(linear, "weight_observer")
+    assert (
+        observer.min_val.keys()
+        == observer.max_val.keys()
+        == exp_min_val.keys()
+        == exp_max_val.keys()
+    )
+    for key in observer.min_val.keys():
+        assert torch.equal(observer.min_val[key], exp_min_val[key])
+        assert torch.equal(observer.max_val[key], exp_max_val[key])
+
+    # forward pass
+    input = torch.rand((1, input_size), dtype=torch.bfloat16)
+    output = linear(input)
+    true_output = input @ linear.weight.T
+    assert torch.allclose(output, true_output, atol=exp_tol)
+
+
+@pytest.mark.parametrize(
+    "args,exp_min_val,exp_max_val,exp_tol",
+    [
+        (
+            QuantizationArgs(
+                num_bits=4,
+                type="int",
+                symmetric=True,
+                strategy="tensor",
+                observer="minmax",
+            ),
+            {"default": torch.tensor(0.0, dtype=torch.bfloat16)},
+            {"default": torch.tensor(23.0, dtype=torch.bfloat16)},
+            2.5,
+        ),
+        (
+            QuantizationArgs(
+                num_bits=4,
+                type="int",
+                symmetric=True,
+                strategy="token",
+                observer="minmax",
+            ),
+            {"default": torch.tensor([[0], [6], [12], [18]], dtype=torch.bfloat16)},
+            {"default": torch.tensor([[5], [11], [17], [23]], dtype=torch.bfloat16)},
+            2.5,
+        ),
+        (
+            QuantizationArgs(
+                num_bits=4,
+                type="int",
+                symmetric=True,
+                strategy="channel",
+                observer="minmax",
+            ),
+            {
+                "default": torch.tensor([[0], [6], [12], [18]], dtype=torch.bfloat16),
+                1: torch.tensor([[3], [9], [15], [21]], dtype=torch.bfloat16),
+            },
+            {
+                "default": torch.tensor([[2], [8], [14], [20]], dtype=torch.bfloat16),
+                1: torch.tensor([[5], [11], [17], [23]], dtype=torch.bfloat16),
+            },
+            2.5,
+        ),
+        (
+            QuantizationArgs(
+                num_bits=4,
+                type="int",
+                symmetric=True,
+                strategy="group",
+                group_size=3,
+                observer="minmax",
+            ),
+            {
+                "default": torch.tensor([[0], [6], [12], [18]], dtype=torch.bfloat16),
+                1: torch.tensor([[3], [9], [15], [21]], dtype=torch.bfloat16),
+            },
+            {
+                "default": torch.tensor([[2], [8], [14], [20]], dtype=torch.bfloat16),
+                1: torch.tensor([[5], [11], [17], [23]], dtype=torch.bfloat16),
+            },
+            2.5,
+        ),
+        (
+            QuantizationArgs(
+                num_bits=4,
+                type="float",
+                symmetric=True,
+                strategy="tensor_group",
+                group_size=3,
+                observer="minmax",
+            ),
+            {
+                "default": torch.tensor([[0], [6], [12], [18]], dtype=torch.bfloat16),
+                1: torch.tensor([[3], [9], [15], [21]], dtype=torch.bfloat16),
+            },
+            {
+                "default": torch.tensor([[2], [8], [14], [20]], dtype=torch.bfloat16),
+                1: torch.tensor([[5], [11], [17], [23]], dtype=torch.bfloat16),
+            },
+            2.5,
+        ),
+        # (
+        #     QuantizationArgs(
+        #         num_bits=4,
+        #         type="int",
+        #         symmetric=True,
+        #         strategy="block",
+        #         block_structure=[2, 3],
+        #         observer="minmax",
+        #     ),
+        #     {
+        #         "block_0_0": torch.tensor([[0]], dtype=torch.bfloat16),
+        #         "block_0_1": torch.tensor([[3]], dtype=torch.bfloat16),
+        #         "block_1_0": torch.tensor([[12]], dtype=torch.bfloat16),
+        #         "block_1_1": torch.tensor([[15]], dtype=torch.bfloat16),
+        #     },
+        #     {
+        #         "block_0_0": torch.tensor([[8]], dtype=torch.bfloat16),
+        #         "block_0_1": torch.tensor([[11]], dtype=torch.bfloat16),
+        #         "block_1_0": torch.tensor([[20]], dtype=torch.bfloat16),
+        #         "block_1_1": torch.tensor([[23]], dtype=torch.bfloat16),
+        #     },
+        #     2.5,
+        # ),
+    ],
+)
+def test_activation_quantization(args, exp_min_val, exp_max_val, exp_tol):
+    # set up activation (and identity weight)
+    input_size = 6
+    input = torch.arange(input_size, dtype=torch.bfloat16).unsqueeze(0)
+    linear = torch.nn.Linear(input_size, input_size, bias=False)
+    linear.weight.data = torch.eye(input_size, dtype=torch.bfloat16)
+
+    # initialize quantization parameters
+    scheme = QuantizationScheme(targets=[], input_activations=args)
+    initialize_module_for_quantization(linear, scheme)
+    assert getattr(linear, "quantization_scheme") is scheme
+
+    # calibrate quantization parameters
+    initialize_observer(linear, "input")
+    linear.register_forward_pre_hook(calibrate_input_hook)
+    
+
+    observer = getattr(linear, "input_observer")
+    breakpoint()
+    assert (
+        observer.min_val.keys()
+        == observer.max_val.keys()
+        == exp_min_val.keys()
+        == exp_max_val.keys()
+    )
+    for key in observer.min_val.keys():
+        assert torch.equal(observer.min_val[key], exp_min_val[key])
+        assert torch.equal(observer.max_val[key], exp_max_val[key])
+
+    # forward pass
+    linear.quantization_status = QuantizationStatus.FROZEN
+    output = linear(input)
+    true_output = input  # (@ linear.weight.T == eye)
+    assert torch.allclose(output, true_output, atol=exp_tol)