This repository was archived by the owner on Sep 10, 2025. It is now read-only.
File tree Expand file tree Collapse file tree 1 file changed +6
-8
lines changed Expand file tree Collapse file tree 1 file changed +6
-8
lines changed Original file line number Diff line number Diff line change 5252
5353
5454# Flag for whether the a8wxdq quantizer is available.
55- a8wxdq_loaded = False
55+ a8wxdq_load_error : Optional [ Exception ] = None
5656
5757#########################################################################
5858### torchchat quantization API ###
@@ -79,6 +79,10 @@ def quantize_model(
7979 quantize_options = json .loads (quantize_options )
8080
8181 for quantizer , q_kwargs in quantize_options .items ():
82+ # Test if a8wxdq quantizer is available; Surface error if not.
83+ if quantizer == "linear:a8wxdq" and a8wxdq_load_error is not None :
84+ raise Exception (f"Note: Failed to load torchao experimental a8wxdq quantizer with error: { a8wxdq_load_error } " )
85+
8286 if (
8387 quantizer not in quantizer_class_dict
8488 and quantizer not in ao_quantizer_class_dict
@@ -100,9 +104,6 @@ def quantize_model(
100104
101105 try :
102106 if quantizer == "linear:a8wxdq" :
103- if not a8wxdq_loaded :
104- raise Exception (f"Note: Failed to load torchao experimental a8wxdq quantizer with error: { e } " )
105-
106107 quant_handler = ao_quantizer_class_dict [quantizer ](
107108 device = device ,
108109 precision = precision ,
@@ -904,8 +905,5 @@ def quantized_model(self) -> nn.Module:
904905 print ("Failed to torchao ops library with error: " , e )
905906 print ("Slow fallback kernels will be used." )
906907
907- # Mark the Quant option as available
908- a8wxdq_loaded = True
909-
910908except Exception as e :
911- pass
909+ a8wxdq_load_error = e
You can’t perform that action at this time.
0 commit comments