raise error if block quantization is used, as it is not yet supported (#1476)

brian-dellabetta · web-flow · commit aca81d65bbdc · 2025-05-27T12:42:43.000-05:00
SUMMARY: More info at #1464 and #1475, for now just raising an error if user is trying to block-quantize. This provides a more useful error than downstream error user is reporting in #1464 TEST PLAN: n/a Signed-off-by: Brian Dellabetta <bdellabe@redhat.com>
diff --git a/src/llmcompressor/observers/base.py b/src/llmcompressor/observers/base.py
@@ -145,6 +145,14 @@ def get_qparams(
                     dim={0, 1},
                 )
 
+            elif self.quantization_args.strategy == QuantizationStrategy.BLOCK:
+                # TODO (#1475) add support for block-wise quantization
+                raise NotImplementedError(
+                    "Block-wise quantization is not yet supported, "
+                    "consider group-wise quantization instead. More info at "
+                    "https://github.com/vllm-project/llm-compressor/issues/1475"
+                )
+
         return self._scale, self._zero_point
 
     def get_qparams_along_dim(