|
43 | 43 | from torch.ao.quantization.quantizer.composable_quantizer import ComposableQuantizer |
44 | 44 |
|
45 | 45 |
|
46 | | -act_qspec_asym8u = QuantizationSpec( |
| 46 | +act_qspec_asym8s = QuantizationSpec( |
47 | 47 | dtype=torch.int8, |
48 | 48 | quant_min=-128, |
49 | 49 | quant_max=127, |
|
52 | 52 | observer_or_fake_quant_ctr=HistogramObserver.with_args(eps=2**-12), |
53 | 53 | ) |
54 | 54 |
|
55 | | -wgt_qspec_asym8u = QuantizationSpec( |
| 55 | +wgt_qspec_asym8s = QuantizationSpec( |
56 | 56 | dtype=torch.int8, |
57 | 57 | quant_min=-128, |
58 | 58 | quant_max=127, |
|
61 | 61 | observer_or_fake_quant_ctr=MinMaxObserver, |
62 | 62 | ) |
63 | 63 |
|
64 | | -wgt_qspec_asym8s = QuantizationSpec( |
| 64 | +wgt_qspec_sym8s = QuantizationSpec( |
65 | 65 | dtype=torch.int8, |
66 | 66 | quant_min=-128, |
67 | 67 | quant_max=127, |
|
72 | 72 |
|
73 | 73 | bias_qspec: Optional[QuantizationSpec] = None |
74 | 74 |
|
75 | | -qconfig_A8uW8u = QuantizationConfig( |
76 | | - act_qspec_asym8u, |
77 | | - act_qspec_asym8u, |
78 | | - wgt_qspec_asym8u, |
| 75 | +qconfig_A8W8 = QuantizationConfig( |
| 76 | + act_qspec_asym8s, |
| 77 | + act_qspec_asym8s, |
| 78 | + wgt_qspec_asym8s, |
79 | 79 | None, |
80 | 80 | ) |
81 | 81 |
|
82 | | -qconfig_A8uW8s = QuantizationConfig( |
83 | | - act_qspec_asym8u, |
84 | | - act_qspec_asym8u, |
85 | | - wgt_qspec_asym8s, |
| 82 | +qconfig_A8W8sym = QuantizationConfig( |
| 83 | + act_qspec_asym8s, |
| 84 | + act_qspec_asym8s, |
| 85 | + wgt_qspec_sym8s, |
86 | 86 | None, |
87 | 87 | ) |
88 | 88 |
|
@@ -189,15 +189,15 @@ def get_supported_operators(cls) -> List[OperatorConfig]: |
189 | 189 |
|
190 | 190 | def get_cadence_default_quantizers() -> List[Quantizer]: |
191 | 191 | return [ |
192 | | - CadenceAtenQuantizer(AddmmPattern(), qconfig_A8uW8u), |
193 | | - CadenceAtenQuantizer(BmmPattern(), qconfig_A8uW8u), |
194 | | - CadenceAtenQuantizer(Conv1dPattern(), qconfig_A8uW8s), |
195 | | - CadenceAtenQuantizer(Conv2dPattern(), qconfig_A8uW8s), |
196 | | - CadenceAtenQuantizer(LayerNormPattern(), qconfig_A8uW8u), |
197 | | - CadenceAtenQuantizer(LinearPattern(), qconfig_A8uW8u), |
198 | | - CadenceAtenQuantizer(MatmulPattern(), qconfig_A8uW8u), |
199 | | - CadenceAtenQuantizer(ReluPattern0(), qconfig_A8uW8u), |
200 | | - CadenceAtenQuantizer(ReluPattern1(), qconfig_A8uW8u), |
| 192 | + CadenceAtenQuantizer(AddmmPattern(), qconfig_A8W8), |
| 193 | + CadenceAtenQuantizer(BmmPattern(), qconfig_A8W8), |
| 194 | + CadenceAtenQuantizer(Conv1dPattern(), qconfig_A8W8sym), |
| 195 | + CadenceAtenQuantizer(Conv2dPattern(), qconfig_A8W8sym), |
| 196 | + CadenceAtenQuantizer(LayerNormPattern(), qconfig_A8W8), |
| 197 | + CadenceAtenQuantizer(LinearPattern(), qconfig_A8W8), |
| 198 | + CadenceAtenQuantizer(MatmulPattern(), qconfig_A8W8), |
| 199 | + CadenceAtenQuantizer(ReluPattern0(), qconfig_A8W8), |
| 200 | + CadenceAtenQuantizer(ReluPattern1(), qconfig_A8W8), |
201 | 201 | ] |
202 | 202 |
|
203 | 203 |
|
@@ -244,6 +244,6 @@ class CadenceWakeWordQuantizer(CadenceQuantizer): |
244 | 244 | def __init__(self, quantizers: Optional[list[Quantizer]] = None) -> None: |
245 | 245 | if quantizers is None: |
246 | 246 | quantizers = get_cadence_default_quantizers() |
247 | | - quantizers.append(CadenceAtenQuantizer(AddPattern(), qconfig_A8uW8u)) |
248 | | - quantizers.append(CadenceAtenQuantizer(CatPattern(), qconfig_A8uW8u)) |
| 247 | + quantizers.append(CadenceAtenQuantizer(AddPattern(), qconfig_A8W8)) |
| 248 | + quantizers.append(CadenceAtenQuantizer(CatPattern(), qconfig_A8W8)) |
249 | 249 | super().__init__(quantizers) |
0 commit comments