Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 9bbbc87

Browse files
authored
Raise a8wxdq load errors only when quant scheme is used (#1231)
* Show a8wxdq load error only when the quant is used * Update Error check
1 parent a318b04 commit 9bbbc87

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

torchchat/utils/quantize.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@
5151
)
5252

5353

54+
# Flag for whether the a8wxdq quantizer is available.
55+
a8wxdq_load_error: Optional[Exception] = None
56+
5457
#########################################################################
5558
### torchchat quantization API ###
5659

@@ -76,6 +79,10 @@ def quantize_model(
7679
quantize_options = json.loads(quantize_options)
7780

7881
for quantizer, q_kwargs in quantize_options.items():
82+
# Test if a8wxdq quantizer is available; Surface error if not.
83+
if quantizer == "linear:a8wxdq" and a8wxdq_load_error is not None:
84+
raise Exception(f"Note: Failed to load torchao experimental a8wxdq quantizer with error: {a8wxdq_load_error}")
85+
7986
if (
8087
quantizer not in quantizer_class_dict
8188
and quantizer not in ao_quantizer_class_dict
@@ -899,4 +906,4 @@ def quantized_model(self) -> nn.Module:
899906
print("Slow fallback kernels will be used.")
900907

901908
except Exception as e:
902-
print(f"Failed to load torchao experimental a8wxdq quantizer with error: {e}")
909+
a8wxdq_load_error = e

0 commit comments

Comments
 (0)