Skip to content

Commit 351dbcf

Browse files
authored
Feat: onnx quantization (#182)
This PR adds "yolox_quantized" model as an alternative detection model, leveraging a similar speed to detectron2_onnx. Also adds "detectron2_quantized" model as a quantized model for "detectron2_onnx". Both models are generated at first usage.
1 parent 3d2045e commit 351dbcf

File tree

6 files changed

+63
-8
lines changed

6 files changed

+63
-8
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## 0.5.20
2+
3+
* Adds YoloX quantized model
4+
15
## 0.5.19
26

37
* Add functionality to supplement detected layout with elements from the full page OCR

requirements/base.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ layoutparser[layoutmodels,tesseract]
22
python-multipart
33
huggingface-hub
44
opencv-python!=4.7.0.68
5+
onnx==1.14.1
56
onnxruntime
67
# NOTE(alan): Pinned because this is when the most recent module we import appeared
78
transformers>=4.25.1

requirements/base.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ numpy==1.24.4
6767
# contourpy
6868
# layoutparser
6969
# matplotlib
70+
# onnx
7071
# onnxruntime
7172
# opencv-python
7273
# pandas
@@ -76,6 +77,8 @@ numpy==1.24.4
7677
# transformers
7778
omegaconf==2.3.0
7879
# via effdet
80+
onnx==1.14.1
81+
# via -r requirements/base.in
7982
onnxruntime==1.15.1
8083
# via -r requirements/base.in
8184
opencv-python==4.8.0.76
@@ -108,7 +111,9 @@ pillow==10.0.0
108111
portalocker==2.7.0
109112
# via iopath
110113
protobuf==4.24.2
111-
# via onnxruntime
114+
# via
115+
# onnx
116+
# onnxruntime
112117
pycocotools==2.0.7
113118
# via effdet
114119
pycparser==2.21
@@ -180,6 +185,7 @@ typing-extensions==4.7.1
180185
# filelock
181186
# huggingface-hub
182187
# iopath
188+
# onnx
183189
# torch
184190
tzdata==2023.3
185191
# via pandas
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.5.19" # pragma: no cover
1+
__version__ = "0.5.20" # pragma: no cover

unstructured_inference/models/detectron2onnx.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1-
from pathlib import Path
1+
import os
22
from typing import Dict, Final, List, Optional, Union
33

44
import cv2
55
import numpy as np
66
import onnxruntime
77
from huggingface_hub import hf_hub_download
8+
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
9+
from onnxruntime.quantization import QuantType, quantize_dynamic
810
from PIL import Image
911

1012
from unstructured_inference.inference.layoutelement import LayoutElement
@@ -27,7 +29,7 @@
2729

2830
# NOTE(alan): Entries are implemented as LazyDicts so that models aren't downloaded until they are
2931
# needed.
30-
MODEL_TYPES: Dict[Optional[str], LazyDict] = {
32+
MODEL_TYPES: Dict[Optional[str], Union[LazyDict, dict]] = {
3133
"detectron2_onnx": LazyDict(
3234
model_path=LazyEvaluateInfo(
3335
hf_hub_download,
@@ -37,6 +39,15 @@
3739
label_map=DEFAULT_LABEL_MAP,
3840
confidence_threshold=0.8,
3941
),
42+
"detectron2_quantized": {
43+
"model_path": os.path.join(
44+
HUGGINGFACE_HUB_CACHE,
45+
"detectron2_quantized",
46+
"detectrin2_quantized.onnx",
47+
),
48+
"label_map": DEFAULT_LABEL_MAP,
49+
"confidence_threshold": 0.8,
50+
},
4051
"detectron2_mask_rcnn": LazyDict(
4152
model_path=LazyEvaluateInfo(
4253
hf_hub_download,
@@ -80,13 +91,17 @@ def predict(self, image: Image.Image) -> List[LayoutElement]:
8091

8192
def initialize(
8293
self,
83-
model_path: Union[str, Path],
94+
model_path: str,
8495
label_map: Dict[int, str],
8596
confidence_threshold: Optional[float] = None,
8697
):
8798
"""Loads the detectron2 model using the specified parameters"""
88-
logger.info("Loading the Detectron2 layout model ...")
89-
self.model_path = str(model_path)
99+
if not os.path.exists(model_path) and "detectron2_quantized" in model_path:
100+
logger.info("Quantized model don't currently exists, quantizing now...")
101+
os.mkdir("".join(os.path.split(model_path)[:-1]))
102+
source_path = MODEL_TYPES["detectron2_onnx"]["model_path"]
103+
quantize_dynamic(source_path, model_path, weight_type=QuantType.QUInt8)
104+
90105
self.model = onnxruntime.InferenceSession(
91106
model_path,
92107
providers=[
@@ -95,6 +110,7 @@ def initialize(
95110
"CPUExecutionProvider",
96111
],
97112
)
113+
self.model_path = model_path
98114
self.label_map = label_map
99115
if confidence_threshold is None:
100116
confidence_threshold = 0.5

unstructured_inference/models/yolox.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,19 @@
33
# https://github.com/Megvii-BaseDetection/YOLOX/blob/237e943ac64aa32eb32f875faa93ebb18512d41d/yolox/data/data_augment.py
44
# https://github.com/Megvii-BaseDetection/YOLOX/blob/ac379df3c97d1835ebd319afad0c031c36d03f36/yolox/utils/demo_utils.py
55

6+
import os
67
from typing import List
78

89
import cv2
910
import numpy as np
1011
import onnxruntime
1112
from huggingface_hub import hf_hub_download
13+
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
14+
from onnxruntime.quantization import QuantType, quantize_dynamic
1215
from PIL import Image
1316

1417
from unstructured_inference.inference.layoutelement import LayoutElement
18+
from unstructured_inference.logger import logger
1519
from unstructured_inference.models.unstructuredmodel import UnstructuredObjectDetectionModel
1620
from unstructured_inference.utils import LazyDict, LazyEvaluateInfo
1721
from unstructured_inference.visualize import draw_yolox_bounding_boxes
@@ -47,6 +51,14 @@
4751
),
4852
label_map=YOLOX_LABEL_MAP,
4953
),
54+
"yolox_quantized": {
55+
"model_path": os.path.join(
56+
HUGGINGFACE_HUB_CACHE,
57+
"yolox_quantized",
58+
"yolox_quantized.onnx",
59+
),
60+
"label_map": YOLOX_LABEL_MAP,
61+
},
5062
}
5163

5264

@@ -58,6 +70,15 @@ def predict(self, x: Image):
5870

5971
def initialize(self, model_path: str, label_map: dict):
6072
"""Start inference session for YoloX model."""
73+
if not os.path.exists(model_path) and "yolox_quantized" in model_path:
74+
logger.info("Quantized model don't currently exists, quantizing now...")
75+
model_folder = "".join(os.path.split(model_path)[:-1])
76+
if not os.path.exists(model_folder):
77+
os.mkdir(model_folder)
78+
source_path = MODEL_TYPES["yolox"]["model_path"]
79+
quantize_dynamic(source_path, model_path, weight_type=QuantType.QUInt8)
80+
self.model_path = model_path
81+
6182
self.model = onnxruntime.InferenceSession(
6283
model_path,
6384
providers=[
@@ -66,6 +87,7 @@ def initialize(self, model_path: str, label_map: dict):
6687
"CPUExecutionProvider",
6788
],
6889
)
90+
6991
self.layout_classes = label_map
7092

7193
def image_processing(
@@ -106,7 +128,13 @@ def image_processing(
106128
boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.0
107129
boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.0
108130
boxes_xyxy /= ratio
109-
dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.45, score_thr=0.1)
131+
132+
# Note (Benjamin): Distinct models (quantized and original) requires distincts
133+
# levels of thresholds
134+
if "quantized" in self.model_path:
135+
dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.0, score_thr=0.07)
136+
else:
137+
dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.1, score_thr=0.25)
110138

111139
regions = []
112140

0 commit comments

Comments
 (0)