We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent f91f6d1 commit 326f802Copy full SHA for 326f802
src/compressed_tensors/quantization/lifecycle/initialize.py
@@ -234,6 +234,12 @@ def initialize_qparams(
234
num_cols = strategy_cdiv(observed_shape[-1], block_structure[-1], strategy)
235
expected_shape = (num_rows, num_cols)
236
237
+ elif strategy == QuantizationStrategy.ATTN_HEAD:
238
+ if len(observed_shape) < 2:
239
+ raise ValueError("Attention quant requires at least 2 observed dimensions")
240
+
241
+ expected_shape = (observed_shape[-2], 1)
242
243
else:
244
assert False, f"Unknown strategy {strategy}"
245
0 commit comments