Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 4e2bee8

Browse files
committed
Show a8wxdq load error only when the quant is used
1 parent 8278aa2 commit 4e2bee8

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

torchchat/utils/quantize.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@
5151
)
5252

5353

54+
# Flag for whether the a8wxdq quantizer is available.
55+
a8wxdq_loaded = False
56+
5457
#########################################################################
5558
### torchchat quantization API ###
5659

@@ -97,6 +100,9 @@ def quantize_model(
97100

98101
try:
99102
if quantizer == "linear:a8wxdq":
103+
if not a8wxdq_loaded:
104+
raise Exception(f"Note: Failed to load torchao experimental a8wxdq quantizer with error: {e}")
105+
100106
quant_handler = ao_quantizer_class_dict[quantizer](
101107
device=device,
102108
precision=precision,
@@ -898,5 +904,8 @@ def quantized_model(self) -> nn.Module:
898904
print("Failed to torchao ops library with error: ", e)
899905
print("Slow fallback kernels will be used.")
900906

907+
# Mark the Quant option as available
908+
a8wxdq_loaded = True
909+
901910
except Exception as e:
902-
print(f"Failed to load torchao experimental a8wxdq quantizer with error: {e}")
911+
pass

0 commit comments

Comments
 (0)