diff --git a/tools/quantize/quantize-ort.py b/tools/quantize/quantize-ort.py index 01fdcfd3..aba57f71 100644 --- a/tools/quantize/quantize-ort.py +++ b/tools/quantize/quantize-ort.py @@ -12,7 +12,7 @@ import onnx from onnx import version_converter import onnxruntime -from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantType, QuantFormat +from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantType, QuantFormat, quant_pre_process from transform import Compose, Resize, CenterCrop, Normalize, ColorConvert, HandAlign @@ -76,6 +76,7 @@ def check_opset(self): def run(self): print('Quantizing {}: act_type {}, wt_type {}'.format(self.model_path, self.act_type, self.wt_type)) new_model_path = self.check_opset() + quant_pre_process(new_model_path, new_model_path) output_name = '{}_{}.onnx'.format(self.model_path[:-5], self.wt_type) quantize_static(new_model_path, output_name, self.dr, quant_format=QuantFormat.QOperator, # start from onnxruntime==1.11.0, quant_format is set to QuantFormat.QDQ by default, which performs fake quantization