Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 6e1cdd5

Browse files
committed
Update Error check
1 parent 4e2bee8 commit 6e1cdd5

File tree

1 file changed

+6
-8
lines changed

1 file changed

+6
-8
lines changed

torchchat/utils/quantize.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252

5353

5454
# Flag for whether the a8wxdq quantizer is available.
55-
a8wxdq_loaded = False
55+
a8wxdq_load_error: Optional[Exception] = None
5656

5757
#########################################################################
5858
### torchchat quantization API ###
@@ -79,6 +79,10 @@ def quantize_model(
7979
quantize_options = json.loads(quantize_options)
8080

8181
for quantizer, q_kwargs in quantize_options.items():
82+
# Test if a8wxdq quantizer is available; Surface error if not.
83+
if quantizer == "linear:a8wxdq" and a8wxdq_load_error is not None:
84+
raise Exception(f"Note: Failed to load torchao experimental a8wxdq quantizer with error: {a8wxdq_load_error}")
85+
8286
if (
8387
quantizer not in quantizer_class_dict
8488
and quantizer not in ao_quantizer_class_dict
@@ -100,9 +104,6 @@ def quantize_model(
100104

101105
try:
102106
if quantizer == "linear:a8wxdq":
103-
if not a8wxdq_loaded:
104-
raise Exception(f"Note: Failed to load torchao experimental a8wxdq quantizer with error: {e}")
105-
106107
quant_handler = ao_quantizer_class_dict[quantizer](
107108
device=device,
108109
precision=precision,
@@ -904,8 +905,5 @@ def quantized_model(self) -> nn.Module:
904905
print("Failed to torchao ops library with error: ", e)
905906
print("Slow fallback kernels will be used.")
906907

907-
# Mark the Quant option as available
908-
a8wxdq_loaded = True
909-
910908
except Exception as e:
911-
pass
909+
a8wxdq_load_error = e

0 commit comments

Comments
 (0)