You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
ifnotenable_int8_compute: # Deprecated. Use this method for reducing model size only.
241
+
ifself.quant_dictisnotNone:
242
+
eps_kernel=torch.quantize_per_tensor(self.eps_kernel.data.normal_(), self.quant_dict[0]['scale'], self.quant_dict[0]['zero_point'], torch.qint8) # Quantize a tensor from normal distribution. 99.7% values will lie within 3 standard deviations, so the original range is set as 6.
ifinput.dtype!=torch.quint8: # check if input has been quantized
255
+
input=torch.quantize_per_tensor(input, self.quant_dict[3]['scale'], self.quant_dict[3]['zero_point'], torch.quint8) # scale=0.1 by grid search; zero_point=128 for uint8 format
ifnotenable_int8_compute: # Deprecated. Use this method for reducing model size only.
494
+
ifself.quant_dictisnotNone:
495
+
eps_kernel=torch.quantize_per_tensor(self.eps_kernel.data.normal_(), self.quant_dict[0]['scale'], self.quant_dict[0]['zero_point'], torch.qint8) # Quantize a tensor from normal distribution. 99.7% values will lie within 3 standard deviations, so the original range is set as 6.
ifinput.dtype!=torch.quint8: # check if input has been quantized
508
+
input=torch.quantize_per_tensor(input, self.quant_dict[3]['scale'], self.quant_dict[3]['zero_point'], torch.quint8) # scale=0.1 by grid search; zero_point=128 for uint8 format
ifnotenable_int8_compute: # Deprecated. Use this method for reducing model size only.
741
+
ifself.quant_dictisnotNone:
742
+
eps_kernel=torch.quantize_per_tensor(self.eps_kernel.data.normal_(), self.quant_dict[0]['scale'], self.quant_dict[0]['zero_point'], torch.qint8) # Quantize a tensor from normal distribution. 99.7% values will lie within 3 standard deviations, so the original range is set as 6.
ifinput.dtype!=torch.quint8: # check if input has been quantized
755
+
input=torch.quantize_per_tensor(input, self.quant_dict[3]['scale'], self.quant_dict[3]['zero_point'], torch.quint8) # scale=0.1 by grid search; zero_point=128 for uint8 format
0 commit comments