Update example to not fail hessian inversion (#904)

dsikka · rahul-tuli · web-flow · commit a173a0c7bcde · 2024-11-08T19:13:08.000-07:00
* update

Signed-off-by: Dipika &lt;dipikasikka1@gmail.com&gt;

* quality

---------

Signed-off-by: Dipika &lt;dipikasikka1@gmail.com&gt;
Co-authored-by: Rahul Tuli &lt;rahul@neuralmagic.com&gt;
diff --git a/examples/big_models_with_accelerate/multi_gpu_int8.py b/examples/big_models_with_accelerate/multi_gpu_int8.py
@@ -59,7 +59,9 @@ def tokenize(sample):
 #   * quantize the weights to int8 with GPTQ (static per channel)
 #   * quantize the activations to int8 (dynamic per token)
 recipe = [
-    GPTQModifier(targets="Linear", scheme="W8A8", ignore=["lm_head"]),
+    GPTQModifier(
+        targets="Linear", scheme="W8A8", ignore=["lm_head"], dampening_frac=0.1
+    ),
 ]
 
 # 4) Apply algorithms and save in `compressed-tensors` format.