fix comments and added additional test file

amitsrivastava78 · amitsrivastava78 · commit 8cef1c7c456c · 2025-08-08T14:05:09.000+05:30
diff --git a/keras/src/models/model_test.py b/keras/src/models/model_test.py
@@ -1298,7 +1298,7 @@ def _run_gptq_test_on_dataset(self, dataset):
         test on a given dataset."""
 
         model = _get_model_with_dense_attention()
-
+        rng = np.random.default_rng(seed=42)
         # 1. Common setup
         NUM_SAMPLES = 16
         SEQUENCE_LENGTH = 128
@@ -1328,25 +1328,26 @@ def _run_gptq_test_on_dataset(self, dataset):
             wbits=W_BITS,
             nsamples=NUM_SAMPLES,
             seqlen=SEQUENCE_LENGTH,
-            groupsize=GROUP_SIZE,
+            group_size=GROUP_SIZE,
         )
         model.quantize("gptq", quant_config=gptq_config)
 
         #  4. Assertions and verification
         quantized_weights = target_layer.kernel.numpy()
 
         # Assert that the weights have been changed
-        self.assertFalse(
-            np.allclose(original_weights, quantized_weights),
-            "Weights were not changed by the GPTQ process for "
+        self.assertNotAllClose(
+            original_weights,
+            quantized_weights,
+            msg="Weights were not changed by the GPTQ process for "
             "dataset: {dataset}",
         )
 
         # Verify the quantized model can still make a prediction
-        dummy_input = np.random.randint(
-            0, VOCAB_SIZE, size=(1, SEQUENCE_LENGTH)
+        dummy_sample = rng.integers(
+            low=0, high=VOCAB_SIZE, size=(1, SEQUENCE_LENGTH)
         )
-        _ = model.predict(dummy_input)
+        _ = model.predict(dummy_sample)
 
     def test_quantize_gptq_on_different_datasets(self):
         """Tests GPTQ with various dataset types (string list, generator)."""
diff --git a/keras/src/quantizers/gptq.py b/keras/src/quantizers/gptq.py
@@ -7,7 +7,6 @@
 class GPTQ:
     def __init__(self, layer):
         self.original_layer = layer
-        self.kernel_shape = layer.kernel.shape
         self.nsamples = 0
         self.quantizer = None
 
@@ -16,13 +15,15 @@ def __init__(self, layer):
             isinstance(layer, EinsumDense) and layer.kernel.ndim == 2
         ):
             # For a standard Dense layer, the dimensions are straightforward.
+            self.kernel_shape = layer.kernel.shape
             self.rows = self.kernel_shape[0]  # Input features
             self.columns = self.kernel_shape[1]  # Output features
             self.layer = layer  # The layer itself can be used directly.
 
         # Handle 3D EinsumDense layers (typically from attention blocks).
         elif isinstance(layer, EinsumDense) and layer.kernel.ndim == 3:
             # For EinsumDense, we determine the effective 2D dimensions.
+            self.kernel_shape = layer.kernel.shape
             shape = list(self.kernel_shape)
             try:
                 d_model_dim_index = shape.index(max(shape))
@@ -53,10 +54,7 @@ def __init__(self, layer):
 
         else:
             # Raise an error if the layer is not supported.
-            raise TypeError(
-                f"Unsupported layer type or kernel shape for GPTQ: "
-                f"{type(layer)} with kernel ndim {layer.kernel.ndim}"
-            )
+            raise TypeError(f"Unsupported layer type for GPTQ: {type(layer)}")
         self.H = ops.zeros((self.rows, self.rows), dtype="float32")
 
     def update_hessian_with_batch(self, inp):
@@ -81,6 +79,17 @@ def update_hessian_with_batch(self, inp):
                 not match the dimensions of the pre-initialized Hessian matrix
                 `self.H`.
         """
+        if inp is None:
+            raise ValueError("Input tensor 'inp' cannot be None.")
+
+        if len(inp.shape) < 2:
+            raise ValueError(
+                f"Input tensor 'inp' must have a rank of at least 2 "
+                f"(e.g., [batch, features]), but got rank {len(inp.shape)}."
+            )
+        if ops.size(inp) == 0:
+            raise ValueError("Input tensor 'inp' cannot be empty.")
+
         if len(inp.shape) > 2:
             inp = ops.reshape(inp, (-1, inp.shape[-1]))
         inp = ops.cast(inp, "float32")
@@ -103,7 +112,7 @@ def update_hessian_with_batch(self, inp):
         self.nsamples += inp.shape[0]
 
     def quantize_and_correct_block(
-        self, blocksize=128, percdamp=0.01, groupsize=-1, actorder=False
+        self, blocksize=128, percdamp=0.01, group_size=-1, actorder=False
     ):
         """
         Performs GPTQ quantization and correction on the layer's weights.
@@ -143,7 +152,7 @@ def quantize_and_correct_block(
             percdamp (float, optional): The percentage of dampening to add the
                 Hessian's diagonal. A value of 0.01 is recommended.
                 Defaults to 0.01.
-            groupsize (int, optional): The number of weights that share the
+            group_size (int, optional): The number of weights that share the
                 same quantization parameters (scale and zero-point).
                 A value of -1 indicates per-channel quantization.
             actorder (bool, optional): If True, reorders weight columns based
@@ -189,10 +198,10 @@ def quantize_and_correct_block(
                 w = W1[:, i]
                 d = Hinv1[i, i]
 
-                if groupsize != -1:
-                    if (i1 + i) % groupsize == 0:
+                if group_size != -1:
+                    if (i1 + i) % group_size == 0:
                         self.quantizer.find_params(
-                            W[:, (i1 + i) : (i1 + i + groupsize)], weight=True
+                            W[:, (i1 + i) : (i1 + i + group_size)], weight=True
                         )
                 else:
                     self.quantizer.find_params(
diff --git a/keras/src/quantizers/gptq_config.py b/keras/src/quantizers/gptq_config.py
@@ -6,45 +6,45 @@
 
 @keras_export(["keras.GPTQConfig", "keras.quantizers.GPTQConfig"])
 class GPTQConfig:
-    """
-    Configuration class for the GPTQ (Generative Pre-trained Transformer
-    Quantization) algorithm.
+    """Configuration class for the GPTQ algorithm.
 
     This class holds all the parameters needed to apply the GPTQ method
-    to a model. Its attributes are based on the original command-line
-    arguments from  the research repository's `opt.py` script.
+    to a model.
 
     Args:
-        dataset (str): Path to the calibration dataset.
-        wbits (int, optional): The number of bits to quantize the weights to.
+        dataset: The calibration dataset. It can be an iterable that yields
+            strings or pre-tokenized numerical tensors (e.g., a list of
+            strings, a generator, or a NumPy array). This data is used to
+            analyze the model's activations.
+        tokenizer: A `keras_nlp.Tokenizer` instance (or a similar callable)
+            that is used to process the `dataset` if it contains strings.
+        wbits (int, optional): The number of bits to quantize weights to.
             Defaults to 4.
         nsamples (int, optional): The number of calibration data samples to
-                                  use. Defaults to 128.
-        seqlen (int, optional): The sequence length to use for calibration.
-            Defaults to 512.
-        percdamp (float, optional): The percentage of Hessian damping to use.
-            Defaults to 0.01.
-        groupsize (int, optional): The size of the group of weights to
-                                    quantize together.A groupsize of
-                                    -1 means quantization is done per-column.
-                                     Defaults to 128.
-        symmetric (bool, optional): If True, uses symmetric quantization.
-                                    If False,uses asymmetric quantization.
-                                    Defaults to False.
-        act_order (bool, optional): If True, quantizes columns in order of
-                                    decreasing activation size.
-                                    Defaults to False.
+            use from the dataset. Defaults to 128.
+        seqlen (int, optional): The sequence length to use for each calibration
+            sample. Defaults to 512.
+        percdamp (float, optional): The % of Hessian damping to use for
+            stabilization during inverse calculation. Defaults to 0.01.
+        group_size (int, optional): The size of weight groups to quantize
+            together. A `group_size` of -1 indicates per-channel quantization.
+            Defaults to 128.
+        symmetric (bool, optional): If `True`, uses symmetric quantization.
+            If `False`, uses asymmetric quantization. Defaults to `False`.
+        act_order (bool, optional): If `True`, reorders weight columns based on
+            activation magnitude, which can improve quantization accuracy.
+            Defaults to `False`.
     """
 
     def __init__(
         self,
         dataset,
-        tokenizer: str,
+        tokenizer,
         wbits: int = 4,
         nsamples: int = 128,
         seqlen: int = 512,
         percdamp: float = 0.01,
-        groupsize: int = 128,
+        group_size: int = 128,
         symmetric: bool = False,
         act_order: bool = False,
     ):
@@ -54,7 +54,7 @@ def __init__(
         self.seqlen = seqlen
         self.percdamp = percdamp
         self.wbits = wbits
-        self.groupsize = groupsize
+        self.group_size = group_size
         self.symmetric = symmetric
         self.act_order = act_order
         self.quantization_method = "gptq"
diff --git a/keras/src/quantizers/gptq_test.py b/keras/src/quantizers/gptq_test.py
@@ -0,0 +1,103 @@
+import numpy as np
+import pytest
+
+from keras.src import layers
+from keras.src import ops
+from keras.src import testing
+from keras.src.quantizers.gptq import GPTQ
+from keras.src.quantizers.gptqquant import GPTQQuant
+
+
+def _get_mock_layer(layer_type, kernel_shape, rng):
+    if layer_type == "Dense":
+        layer = layers.Dense(units=kernel_shape[1])
+        layer.build(input_shape=(None, kernel_shape[0]))
+    elif layer_type == "EinsumDense":
+        output_shape = (kernel_shape[1], kernel_shape[2])
+        layer = layers.EinsumDense(
+            equation="...h,hio->...io", output_shape=output_shape
+        )
+        dummy_input = rng.standard_normal(size=(1, 1, kernel_shape[0]))
+        layer(dummy_input)
+        layer.kernel.assign(
+            rng.standard_normal(size=kernel_shape).astype("float32")
+        )
+    else:
+        layer = layers.Layer()
+    return layer
+
+
+@pytest.mark.requires_trainable_backend
+class GPTQTest(testing.TestCase):
+    def test_initialization_with_dense_layer(self):
+        rng = np.random.default_rng(seed=42)
+
+        mock_layer = _get_mock_layer("Dense", kernel_shape=(64, 128), rng=rng)
+
+        gptq_instance = GPTQ(mock_layer)
+        self.assertEqual(gptq_instance.rows, 64)
+        self.assertEqual(gptq_instance.columns, 128)
+        self.assertEqual(gptq_instance.H.shape, (64, 64))
+
+    def test_initialization_with_einsumdense_3d(self):
+        rng = np.random.default_rng(seed=42)
+        mock_layer = _get_mock_layer(
+            "EinsumDense", kernel_shape=(64, 4, 32), rng=rng
+        )
+        gptq_instance = GPTQ(mock_layer)
+        self.assertEqual(gptq_instance.rows, 64)
+        self.assertEqual(gptq_instance.columns, 4 * 32)
+        self.assertEqual(gptq_instance.H.shape, (64, 64))
+
+    def test_update_hessian(self):
+        rng = np.random.default_rng(seed=42)
+        mock_layer = _get_mock_layer("Dense", kernel_shape=(16, 32), rng=rng)
+        gptq_instance = GPTQ(mock_layer)
+        batch1 = rng.standard_normal(size=(8, 16)).astype("float32")
+        gptq_instance.update_hessian_with_batch(batch1)
+        self.assertEqual(gptq_instance.nsamples, 8)
+        H1 = np.copy(ops.convert_to_numpy(gptq_instance.H))
+        batch2 = rng.standard_normal(size=(4, 16)).astype("float32")
+        gptq_instance.update_hessian_with_batch(batch2)
+        self.assertEqual(gptq_instance.nsamples, 12)
+        H2 = np.copy(ops.convert_to_numpy(gptq_instance.H))
+        self.assertFalse(np.allclose(H1, H2))
+
+    def test_full_quantization_process(self):
+        rng = np.random.default_rng(seed=42)
+        mock_layer = _get_mock_layer("Dense", kernel_shape=(16, 32), rng=rng)
+        original_weights = np.copy(ops.convert_to_numpy(mock_layer.kernel))
+
+        gptq_instance = GPTQ(mock_layer)
+        gptq_instance.quantizer = GPTQQuant()
+        gptq_instance.quantizer.configure(wbits=4, symmetric=False)
+
+        calibration_data = rng.standard_normal(size=(128, 16)).astype("float32")
+        gptq_instance.update_hessian_with_batch(calibration_data)
+        gptq_instance.quantize_and_correct_block()
+
+        quantized_weights = ops.convert_to_numpy(mock_layer.kernel)
+        self.assertFalse(np.allclose(original_weights, quantized_weights))
+
+        gptq_instance.free()
+        self.assertIsNone(gptq_instance.H)
+
+    def test_unsupported_layer_error(self):
+        rng = np.random.default_rng(seed=42)
+        unsupported_layer = _get_mock_layer(
+            "Unsupported", kernel_shape=None, rng=rng
+        )
+        with self.assertRaisesRegex(TypeError, "Unsupported layer type"):
+            GPTQ(unsupported_layer)
+
+    def test_update_hessian_invalid_input(self):
+        rng = np.random.default_rng(seed=42)
+        mock_layer = _get_mock_layer("Dense", kernel_shape=(16, 32), rng=rng)
+        gptq_instance = GPTQ(mock_layer)
+        with self.assertRaisesRegex(ValueError, "cannot be None"):
+            gptq_instance.update_hessian_with_batch(None)
+        with self.assertRaisesRegex(ValueError, "cannot be empty"):
+            gptq_instance.update_hessian_with_batch(np.empty((0, 16)))
+        with self.assertRaisesRegex(ValueError, "match input features"):
+            bad_input = rng.standard_normal(size=(8, 99))
+            gptq_instance.update_hessian_with_batch(bad_input)
diff --git a/keras/src/quantizers/gptqquant.py b/keras/src/quantizers/gptqquant.py
diff --git a/keras/src/quantizers/gptqutils.py b/keras/src/quantizers/gptqutils.py