Fixed gptq algo for inline weights update

amitsrivastava78 · amitsrivastava78 · commit 7a901481fae1 · 2025-08-06T13:01:01.000+05:30
diff --git a/keras/src/models/model_test.py b/keras/src/models/model_test.py
@@ -1391,8 +1391,3 @@ def test_quantize_gptq_with_data_gen(self):
             nsamples=16, seqlen=128, vocab_size=1000
         )
         _run_gptq_test_on_dataset(self, generator_dataset)
-
-    @pytest.mark.slow
-    def test_quantize_gptq_with_wikitext2(self):
-        """Tests GPTQ with the 'wikitext2' dataset identifier."""
-        _run_gptq_test_on_dataset(self, "wikitext2")
diff --git a/keras/src/quantizers/gptq.py b/keras/src/quantizers/gptq.py
@@ -135,23 +135,23 @@ def quantize_and_correct_block(
                     self.quantizer.maxq,
                 )[:, 0]
 
-                Q1 = ops.concatenate(
-                    [Q1[:, :i], ops.expand_dims(q, 1), Q1[:, i + 1 :]], axis=1
-                )
+                Q1 = ops.slice_update(Q1, (0, i), ops.expand_dims(q, axis=1))
                 err = (w - q) / d
-                Err1 = ops.concatenate(
-                    [Err1[:, :i], ops.expand_dims(err, 1), Err1[:, i + 1 :]],
-                    axis=1,
+                Err1 = ops.slice_update(
+                    Err1, (0, i), ops.expand_dims(err, axis=1)
                 )
 
                 if i < count - 1:
                     update = ops.matmul(
                         ops.expand_dims(err, 1),
                         ops.expand_dims(Hinv1[i, i + 1 :], 0),
                     )
-                    W1 = ops.concatenate(
-                        [W1[:, : i + 1], W1[:, i + 1 :] - update], axis=1
-                    )
+
+                    # Efficiently update the remaining part of the W1 tensor.
+                    # This is equivalent to W1[:, i + 1 :] -= update
+                    slice_to_update = W1[:, i + 1 :]
+                    updated_slice = slice_to_update - update
+                    W1 = ops.slice_update(W1, (0, i + 1), updated_slice)
 
             Q = ops.concatenate([Q[:, :i1], Q1, Q[:, i2:]], axis=1)
 

Original file line number	Diff line number	Diff line change
`@@ -1391,8 +1391,3 @@ def test_quantize_gptq_with_data_gen(self):`
`1391`	`1391`	`nsamples=16, seqlen=128, vocab_size=1000`
`1392`	`1392`	`)`
`1393`	`1393`	`_run_gptq_test_on_dataset(self, generator_dataset)`
`1394`		`-`
`1395`		`- @pytest.mark.slow`
`1396`		`- def test_quantize_gptq_with_wikitext2(self):`
`1397`		`- """Tests GPTQ with the 'wikitext2' dataset identifier."""`
`1398`		`- _run_gptq_test_on_dataset(self, "wikitext2")`