[GPTQ] Add inversion fallback (#1283)

kylesayrs · web-flow · commit 0873f2846e92 · 2025-04-01T17:47:50.000Z
## Purpose ##
* Given the increasing size of large language models (such as
DeepSeek-R1 which contains 45034 linear layers), the likelihood that any
of the hessian inversions will spontaneously fail is significant
* These changes cause the GPTQ algorithm to fall back to RTN for any
layers which fail hessian inversion

## Changes ##
* Implement fallback by setting hessian value to identity matrix if
inversion fails

---------

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/llmcompressor/modifiers/quantization/gptq/gptq_quantize.py b/src/llmcompressor/modifiers/quantization/gptq/gptq_quantize.py
@@ -10,6 +10,7 @@
     QuantizationStrategy,
     fake_quantize,
 )
+from loguru import logger
 
 from llmcompressor.modifiers.utils import SPARSITY_THRESHOLD
 from llmcompressor.observers.base import Observer
@@ -161,11 +162,13 @@ def quantize_weight(
         H = torch.linalg.cholesky(H, upper=True)
         Hinv = H
     except torch._C._LinAlgError:
-        raise torch._C._LinAlgError(
+        logger.warning(
             "Failed to invert hessian due to numerical instability. Consider "
             "increasing GPTQModifier.dampening_frac, increasing the number "
-            "of calibration samples, or shuffling the calibration dataset"
+            "of calibration samples, or shuffling the calibration dataset. "
+            "Falling back to round-to-nearest for this module."
         )
+        Hinv = H = torch.eye(num_columns, dtype=H.dtype, device=H.device)
 
     # See section 3.4 of https://arxiv.org/abs/2203.07259
     for i1 in range(0, num_columns, blocksize):