Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion src/compressed_tensors/quantization/quant_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ def validate_model_after(model: "QuantizationArgs") -> "QuantizationArgs":
# extract user-passed values from dictionary
strategy = model.strategy
group_size = model.group_size
block_structure = model.block_structure
actorder = model.actorder
dynamic = model.dynamic
observer = model.observer
Expand All @@ -277,7 +278,7 @@ def validate_model_after(model: "QuantizationArgs") -> "QuantizationArgs":
"strategy='group' and group_size = -1 for 'channel'"
)

# validate strategy and group
# validate group strategy
if strategy == QuantizationStrategy.GROUP:
if group_size is None or group_size <= 0:
raise ValueError(
Expand All @@ -292,6 +293,14 @@ def validate_model_after(model: "QuantizationArgs") -> "QuantizationArgs":
):
raise ValueError("group_size requires strategy to be set to 'group'")

# validate block strategy
has_block_strategy = strategy == QuantizationStrategy.BLOCK
has_block_structure = block_structure is not None
if has_block_strategy and not has_block_structure:
raise ValueError(f"Block strategy requires block structure\n{model}")
if has_block_structure and not has_block_strategy:
raise ValueError(f"Block structure requires block strategy\n{model}")

# validate activation ordering and strategy
if actorder is not None and strategy != QuantizationStrategy.GROUP:
raise ValueError(
Expand Down
20 changes: 13 additions & 7 deletions tests/test_quantization/test_utils/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,36 +32,42 @@
[
(
False,
QuantizationStrategy.TENSOR,
"tensor",
torch.Size(
[
1,
]
),
),
(True, QuantizationStrategy.CHANNEL, torch.Size([1, 1])),
(True, QuantizationStrategy.GROUP, torch.Size([1, 1])),
(True, "channel", torch.Size([1, 1])),
(True, "group", torch.Size([1, 1])),
(
False,
QuantizationStrategy.BLOCK,
"block",
torch.Size(
[
1,
]
),
),
(True, QuantizationStrategy.TOKEN, torch.Size([1, 1])),
(True, "token", torch.Size([1, 1])),
],
)
def test_calculate_qparams(keepdims, strategy, exp_shape):
value = torch.randn(14, 5)
value = torch.empty(5, 6)
min_val = torch.amin(value, dim=tuple(), keepdims=keepdims)
max_val = torch.amax(value, dim=tuple(), keepdims=keepdims)

if strategy == QuantizationStrategy.GROUP:
args = QuantizationArgs(strategy=strategy, group_size=2)
elif strategy == QuantizationStrategy.BLOCK:
args = QuantizationArgs(strategy=strategy, block_structure=[1, 3])
else:
args = QuantizationArgs(strategy=strategy)
args = QuantizationArgs(
strategy=strategy,
group_size=(2 if strategy == "group" else None),
block_structure=([1, 3] if strategy == "block" else None),
)
scale, zp = calculate_qparams(min_val, max_val, args)
assert scale.shape == exp_shape
assert zp.shape == exp_shape
Expand Down