Feat: onnx quantization (#182)

benjats07 · web-flow · commit 351dbcfd3c05 · 2023-09-01T20:34:52.000-07:00
This PR adds "yolox_quantized" model as an alternative detection model,
leveraging a similar speed to detectron2_onnx.
Also adds "detectron2_quantized" model as a quantized model for
"detectron2_onnx".
Both models are generated at first usage.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 0.5.20
+
+* Adds YoloX quantized model
+
 ## 0.5.19
 
 * Add functionality to supplement detected layout with elements from the full page OCR
diff --git a/requirements/base.in b/requirements/base.in
@@ -2,6 +2,7 @@ layoutparser[layoutmodels,tesseract]
 python-multipart
 huggingface-hub
 opencv-python!=4.7.0.68
+onnx==1.14.1
 onnxruntime
 # NOTE(alan): Pinned because this is when the most recent module we import appeared
 transformers>=4.25.1
diff --git a/requirements/base.txt b/requirements/base.txt
@@ -67,6 +67,7 @@ numpy==1.24.4
     #   contourpy
     #   layoutparser
     #   matplotlib
+    #   onnx
     #   onnxruntime
     #   opencv-python
     #   pandas
@@ -76,6 +77,8 @@ numpy==1.24.4
     #   transformers
 omegaconf==2.3.0
     # via effdet
+onnx==1.14.1
+    # via -r requirements/base.in
 onnxruntime==1.15.1
     # via -r requirements/base.in
 opencv-python==4.8.0.76
@@ -108,7 +111,9 @@ pillow==10.0.0
 portalocker==2.7.0
     # via iopath
 protobuf==4.24.2
-    # via onnxruntime
+    # via
+    #   onnx
+    #   onnxruntime
 pycocotools==2.0.7
     # via effdet
 pycparser==2.21
@@ -180,6 +185,7 @@ typing-extensions==4.7.1
     #   filelock
     #   huggingface-hub
     #   iopath
+    #   onnx
     #   torch
 tzdata==2023.3
     # via pandas
diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py
@@ -1 +1 @@
-__version__ = "0.5.19"  # pragma: no cover
+__version__ = "0.5.20"  # pragma: no cover
diff --git a/unstructured_inference/models/detectron2onnx.py b/unstructured_inference/models/detectron2onnx.py
@@ -1,10 +1,12 @@
-from pathlib import Path
+import os
 from typing import Dict, Final, List, Optional, Union
 
 import cv2
 import numpy as np
 import onnxruntime
 from huggingface_hub import hf_hub_download
+from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
+from onnxruntime.quantization import QuantType, quantize_dynamic
 from PIL import Image
 
 from unstructured_inference.inference.layoutelement import LayoutElement
@@ -27,7 +29,7 @@
 
 # NOTE(alan): Entries are implemented as LazyDicts so that models aren't downloaded until they are
 # needed.
-MODEL_TYPES: Dict[Optional[str], LazyDict] = {
+MODEL_TYPES: Dict[Optional[str], Union[LazyDict, dict]] = {
     "detectron2_onnx": LazyDict(
         model_path=LazyEvaluateInfo(
             hf_hub_download,
@@ -37,6 +39,15 @@
         label_map=DEFAULT_LABEL_MAP,
         confidence_threshold=0.8,
     ),
+    "detectron2_quantized": {
+        "model_path": os.path.join(
+            HUGGINGFACE_HUB_CACHE,
+            "detectron2_quantized",
+            "detectrin2_quantized.onnx",
+        ),
+        "label_map": DEFAULT_LABEL_MAP,
+        "confidence_threshold": 0.8,
+    },
     "detectron2_mask_rcnn": LazyDict(
         model_path=LazyEvaluateInfo(
             hf_hub_download,
@@ -80,13 +91,17 @@ def predict(self, image: Image.Image) -> List[LayoutElement]:
 
     def initialize(
         self,
-        model_path: Union[str, Path],
+        model_path: str,
         label_map: Dict[int, str],
         confidence_threshold: Optional[float] = None,
     ):
         """Loads the detectron2 model using the specified parameters"""
-        logger.info("Loading the Detectron2 layout model ...")
-        self.model_path = str(model_path)
+        if not os.path.exists(model_path) and "detectron2_quantized" in model_path:
+            logger.info("Quantized model don't currently exists, quantizing now...")
+            os.mkdir("".join(os.path.split(model_path)[:-1]))
+            source_path = MODEL_TYPES["detectron2_onnx"]["model_path"]
+            quantize_dynamic(source_path, model_path, weight_type=QuantType.QUInt8)
+
         self.model = onnxruntime.InferenceSession(
             model_path,
             providers=[
@@ -95,6 +110,7 @@ def initialize(
                 "CPUExecutionProvider",
             ],
         )
+        self.model_path = model_path
         self.label_map = label_map
         if confidence_threshold is None:
             confidence_threshold = 0.5
diff --git a/unstructured_inference/models/yolox.py b/unstructured_inference/models/yolox.py
@@ -3,15 +3,19 @@
 # https://github.com/Megvii-BaseDetection/YOLOX/blob/237e943ac64aa32eb32f875faa93ebb18512d41d/yolox/data/data_augment.py
 # https://github.com/Megvii-BaseDetection/YOLOX/blob/ac379df3c97d1835ebd319afad0c031c36d03f36/yolox/utils/demo_utils.py
 
+import os
 from typing import List
 
 import cv2
 import numpy as np
 import onnxruntime
 from huggingface_hub import hf_hub_download
+from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
+from onnxruntime.quantization import QuantType, quantize_dynamic
 from PIL import Image
 
 from unstructured_inference.inference.layoutelement import LayoutElement
+from unstructured_inference.logger import logger
 from unstructured_inference.models.unstructuredmodel import UnstructuredObjectDetectionModel
 from unstructured_inference.utils import LazyDict, LazyEvaluateInfo
 from unstructured_inference.visualize import draw_yolox_bounding_boxes
@@ -47,6 +51,14 @@
         ),
         label_map=YOLOX_LABEL_MAP,
     ),
+    "yolox_quantized": {
+        "model_path": os.path.join(
+            HUGGINGFACE_HUB_CACHE,
+            "yolox_quantized",
+            "yolox_quantized.onnx",
+        ),
+        "label_map": YOLOX_LABEL_MAP,
+    },
 }
 
 
@@ -58,6 +70,15 @@ def predict(self, x: Image):
 
     def initialize(self, model_path: str, label_map: dict):
         """Start inference session for YoloX model."""
+        if not os.path.exists(model_path) and "yolox_quantized" in model_path:
+            logger.info("Quantized model don't currently exists, quantizing now...")
+            model_folder = "".join(os.path.split(model_path)[:-1])
+            if not os.path.exists(model_folder):
+                os.mkdir(model_folder)
+            source_path = MODEL_TYPES["yolox"]["model_path"]
+            quantize_dynamic(source_path, model_path, weight_type=QuantType.QUInt8)
+        self.model_path = model_path
+
         self.model = onnxruntime.InferenceSession(
             model_path,
             providers=[
@@ -66,6 +87,7 @@ def initialize(self, model_path: str, label_map: dict):
                 "CPUExecutionProvider",
             ],
         )
+
         self.layout_classes = label_map
 
     def image_processing(
@@ -106,7 +128,13 @@ def image_processing(
         boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.0
         boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.0
         boxes_xyxy /= ratio
-        dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.45, score_thr=0.1)
+
+        # Note (Benjamin): Distinct models (quantized and original) requires distincts
+        # levels of thresholds
+        if "quantized" in self.model_path:
+            dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.0, score_thr=0.07)
+        else:
+            dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.1, score_thr=0.25)
 
         regions = []
 

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.5.19" # pragma: no cover`
	`1`	`+__version__ = "0.5.20" # pragma: no cover`