Introduce Preprocessing for Optimized Quantization in quantize-ort.py (#238)

Tim-Siu · web-flow · commit bdd60c27da00 · 2024-02-27T14:18:54.000+08:00
* Add preprocessing with optimization before quantization

* Clean up unintended changed

---------

Co-authored-by: Tim-Siu &lt;shuyao@u.nus.edu&gt;
diff --git a/tools/quantize/quantize-ort.py b/tools/quantize/quantize-ort.py
@@ -12,7 +12,7 @@
 import onnx
 from onnx import version_converter
 import onnxruntime
-from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantType, QuantFormat
+from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantType, QuantFormat, quant_pre_process
 
 from transform import Compose, Resize, CenterCrop, Normalize, ColorConvert, HandAlign
 
@@ -76,6 +76,7 @@ def check_opset(self):
     def run(self):
         print('Quantizing {}: act_type {}, wt_type {}'.format(self.model_path, self.act_type, self.wt_type))
         new_model_path = self.check_opset()
+        quant_pre_process(new_model_path, new_model_path)
         output_name = '{}_{}.onnx'.format(self.model_path[:-5], self.wt_type)
         quantize_static(new_model_path, output_name, self.dr,
                         quant_format=QuantFormat.QOperator, # start from onnxruntime==1.11.0, quant_format is set to QuantFormat.QDQ by default, which performs fake quantization