4040from torch .ao .quantization .quantizer .composable_quantizer import ComposableQuantizer
4141
4242
43- act_qspec = QuantizationSpec (
44- dtype = torch .uint8 ,
45- quant_min = 0 ,
46- quant_max = 255 ,
43+ act_qspec_asym8u = QuantizationSpec (
44+ dtype = torch .int8 ,
45+ quant_min = - 128 ,
46+ quant_max = 127 ,
4747 qscheme = torch .per_tensor_affine ,
4848 is_dynamic = False ,
4949 observer_or_fake_quant_ctr = HistogramObserver .with_args (eps = 2 ** - 12 ),
5050)
5151
52- wgt_qspec = QuantizationSpec (
53- dtype = torch .uint8 ,
54- quant_min = 0 ,
55- quant_max = 255 ,
52+ wgt_qspec_asym8u = QuantizationSpec (
53+ dtype = torch .int8 ,
54+ quant_min = - 128 ,
55+ quant_max = 127 ,
5656 qscheme = torch .per_tensor_affine ,
5757 is_dynamic = False ,
5858 observer_or_fake_quant_ctr = MinMaxObserver ,
5959)
6060
61+ wgt_qspec_asym8s = QuantizationSpec (
62+ dtype = torch .int8 ,
63+ quant_min = - 128 ,
64+ quant_max = 127 ,
65+ qscheme = torch .per_tensor_symmetric ,
66+ is_dynamic = False ,
67+ observer_or_fake_quant_ctr = MinMaxObserver ,
68+ )
69+
6170bias_qspec : Optional [QuantizationSpec ] = None
6271
63- _default_qconfig = QuantizationConfig (
64- act_qspec ,
65- act_qspec ,
66- wgt_qspec ,
72+ qconfig_A8uW8u = QuantizationConfig (
73+ act_qspec_asym8u ,
74+ act_qspec_asym8u ,
75+ wgt_qspec_asym8u ,
76+ None ,
77+ )
78+
79+ qconfig_A8uW8s = QuantizationConfig (
80+ act_qspec_asym8u ,
81+ act_qspec_asym8u ,
82+ wgt_qspec_asym8s ,
6783 None ,
6884)
6985
@@ -147,19 +163,17 @@ def get_supported_operators(cls) -> List[OperatorConfig]:
147163 return []
148164
149165
150- def get_cadence_default_quantizer_list_with_config (
151- quantization_config : QuantizationConfig ,
152- ) -> List [Quantizer ]:
166+ def get_cadence_default_quantizers () -> List [Quantizer ]:
153167 return [
154- CadenceAtenQuantizer (AddmmPattern (), quantization_config ),
155- CadenceAtenQuantizer (BmmPattern (), quantization_config ),
156- CadenceAtenQuantizer (Conv1dPattern (), quantization_config ),
157- CadenceAtenQuantizer (Conv2dPattern (), quantization_config ),
158- CadenceAtenQuantizer (LayerNormPattern (), quantization_config ),
159- CadenceAtenQuantizer (LinearPattern (), quantization_config ),
160- CadenceAtenQuantizer (MatmulPattern (), quantization_config ),
161- CadenceAtenQuantizer (ReluPattern0 (), quantization_config ),
162- CadenceAtenQuantizer (ReluPattern1 (), quantization_config ),
168+ CadenceAtenQuantizer (AddmmPattern (), qconfig_A8uW8u ),
169+ CadenceAtenQuantizer (BmmPattern (), qconfig_A8uW8u ),
170+ CadenceAtenQuantizer (Conv1dPattern (), qconfig_A8uW8s ),
171+ CadenceAtenQuantizer (Conv2dPattern (), qconfig_A8uW8s ),
172+ CadenceAtenQuantizer (LayerNormPattern (), qconfig_A8uW8u ),
173+ CadenceAtenQuantizer (LinearPattern (), qconfig_A8uW8u ),
174+ CadenceAtenQuantizer (MatmulPattern (), qconfig_A8uW8u ),
175+ CadenceAtenQuantizer (ReluPattern0 (), qconfig_A8uW8u ),
176+ CadenceAtenQuantizer (ReluPattern1 (), qconfig_A8uW8u ),
163177 ]
164178
165179
@@ -178,10 +192,9 @@ class CadenceDefaultQuantizer(CadenceQuantizer):
178192 Default quantizer for Cadence backend.
179193 """
180194
181- def __init__ (self , qconfig : Optional [QuantizationConfig ] = None ) -> None :
182- if qconfig is None :
183- qconfig = _default_qconfig
184- quantizers = get_cadence_default_quantizer_list_with_config (qconfig )
195+ def __init__ (self , quantizers : Optional [list [Quantizer ]] = None ) -> None :
196+ if quantizers is None :
197+ quantizers = get_cadence_default_quantizers ()
185198 super ().__init__ (quantizers )
186199
187200
0 commit comments