Skip to content

Commit 2bd64f0

Browse files
author
jinhailiang
committed
feat:ui infer
1 parent 87aeede commit 2bd64f0

File tree

8 files changed

+250
-4
lines changed

8 files changed

+250
-4
lines changed

README.md

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,20 @@
44
![GitHub](https://img.shields.io/github/license/Meituan-Dianping/vision-diff)
55
![GitHub](https://img.shields.io/docker/cloud/build/brighthai/vision-ui)
66

7-
## 什么是Vision UI
7+
# 简介
88

9-
Vision UI是一组图像处理算法,来源于美团视觉测试工具,提供如视觉对比(增量式对比)、图像融合和文本识别。
9+
Vision UI 源于美团视觉测试工具,提供基于图像的UI处理和分析
1010

11-
本项目无需训练模型,基于训练模型的项目在[Vision-ml](https://github.com/Meituan-Dianping/vision)
11+
本项目无需训练模型,提供训练框架的项目在[Vision-ml](https://github.com/Meituan-Dianping/vision)
1212

1313
## 特性
1414

1515
* 超越像素对比-[视觉对比](resources/vision_diff_cn.md)
1616

1717
* 基于模板匹配-[图像融合](resources/vision_merge.md)
1818

19+
* 预训练模型-[UI目标检测](resources/vision_infer.md)
20+
1921
* 集成模型-[文本识别](resources/vision_text.md)
2022

2123

@@ -25,6 +27,12 @@ Vision UI是一组图像处理算法,来源于美团视觉测试工具,提
2527
| ------------------------------ | -------------------------------- | -------------------------------- | ------------------------------------- |
2628
| ![](image/1_0.png) | ![](image/1_1.png) | ![](image/1_2.png) | ![](image/1_merge.png)
2729

30+
31+
# UI目标检测
32+
| App1 | App2 | App3 |
33+
|-------------------------|-------------------------|-------------------------|
34+
| ![](image/infer_01.png) | ![](image/infer_02.png) | ![](image/infer_03.png) |
35+
2836
### 视觉对比
2937

3038
| base | comparison | diff |

image/infer_01.png

409 KB
Loading

image/infer_02.png

365 KB
Loading

image/infer_03.png

410 KB
Loading

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,6 @@ Flask-Cors==3.0.7
55
pillow==7.1.0
66
paddlepaddle==1.8.5
77
gunicorn==20.0.4
8-
onnxruntime==1.4.0
8+
onnxruntime==1.10.0
99
pyclipper==1.2.0
1010
shapely==1.7.1

resources/vision_infer.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# UI目标检测
2+
3+
> Vision-infer
4+
5+
### 简介
6+
在CPU下能快速推理的UI检测模型
7+
8+
9+
### 模型性能
10+
11+
* 基于[YOLOX](https://github.com/Megvii-BaseDetection/YOLOX) 目标检测框架,训练阶段修改了部分超参数,
12+
识别目标为UI中常见的图片和图标,文本可由OCR获得详见[文本识别](vision_text.md),在开放测试集中平均准确超过90%
13+
14+
15+
*[ONNX](https://onnx.ai) Optimizer转换,用i7-9750H CPU推理时间105ms,
16+
可转为[TensorRT](https://github.com/onnx/onnx-tensorrt) 用GPU进一步加速推理
17+
18+
### 使用说明
19+
1.下载预训练的UI目标检测模型[ui-det](https://github.com/Meituan-Dianping/vision-ui/releases/download/v0.2/ui_det_v1.onnx) 到指定的目录,
20+
修改vision-ui/services/image_infer.py文件中调试代码部分,替换model_path。
21+
22+
2.运行调试代码,结果文件保存在指定的infer_result_path目录

service/image_infer.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import os.path
2+
import cv2
3+
import numpy as np
4+
import onnxruntime
5+
import time
6+
from service.image_utils import yolox_preprocess, yolox_postprocess, multiclass_nms, img_show
7+
8+
9+
class ImageInfer(object):
10+
def __init__(self, model_path):
11+
self.UI_CLASSES = ("bg", "icon", "pic")
12+
self.input_shape = [640, 640]
13+
self.cls_thresh = 0.5
14+
self.nms_thresh = 0.2
15+
self.model_path = model_path
16+
self.model_session = onnxruntime.InferenceSession(self.model_path)
17+
18+
def ui_infer(self, image_path):
19+
origin_img = cv2.imread(image_path)
20+
img, ratio = yolox_preprocess(origin_img, self.input_shape)
21+
ort_inputs = {self.model_session.get_inputs()[0].name: img[None, :, :, :]}
22+
output = self.model_session.run(None, ort_inputs)
23+
predictions = yolox_postprocess(output[0], self.input_shape)[0]
24+
boxes = predictions[:, :4]
25+
scores = predictions[:, 4:5] * predictions[:, 5:]
26+
boxes_xyxy = np.ones_like(boxes)
27+
boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.
28+
boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.
29+
boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.
30+
boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.
31+
boxes_xyxy /= ratio
32+
dets = multiclass_nms(boxes_xyxy, scores, nms_thr=self.nms_thresh, score_thr=self.cls_thresh)
33+
return dets
34+
35+
def show_infer(self, dets, origin_img, infer_result_path):
36+
if dets is not None:
37+
boxes, scores, cls_inds = dets[:, :4], dets[:, 4], dets[:, 5]
38+
origin_img = img_show(origin_img, boxes, scores, cls_inds, conf=self.cls_thresh,
39+
class_names=self.UI_CLASSES)
40+
cv2.imwrite(infer_result_path, origin_img)
41+
42+
43+
if __name__ == '__main__':
44+
"""
45+
调试代码
46+
"""
47+
image_path = "../capture/local_images/01.png"
48+
model_path = "../capture/local_models/ui_det_v1.onnx"
49+
infer_result_path = "../capture/local_images"
50+
assert os.path.exists(image_path)
51+
assert os.path.exists(model_path)
52+
if not os.path.exists(infer_result_path):
53+
os.mkdir(infer_result_path)
54+
image_infer = ImageInfer(model_path)
55+
t1 = time.time()
56+
dets = image_infer.ui_infer(image_path)
57+
print(f"Infer time: {round(time.time()-t1, 3)}s")
58+
infer_result_name = f"infer_{str(time.time()).split('.')[-1][:4]}.png"
59+
image_infer.show_infer(dets, cv2.imread(image_path), os.path.join(infer_result_path, infer_result_name))

service/image_utils.py

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,160 @@ def get_label_pos(contour):
7878
def draw_contours(img, contours, color="info"):
7979
if color == "info":
8080
cv2.drawContours(img, contours, -1, (255, 145, 30), 3)
81+
82+
83+
def yolox_preprocess(img, input_size, swap=(2, 0, 1)):
84+
if len(img.shape) == 3:
85+
padded_img = numpy.ones((input_size[0], input_size[1], 3), dtype=numpy.uint8) * 114
86+
else:
87+
padded_img = numpy.ones(input_size, dtype=numpy.uint8) * 114
88+
r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
89+
resized_img = cv2.resize(
90+
img,
91+
(int(img.shape[1] * r), int(img.shape[0] * r)),
92+
interpolation=cv2.INTER_LINEAR,
93+
).astype(numpy.uint8)
94+
padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
95+
padded_img = padded_img.transpose(swap)
96+
padded_img = numpy.ascontiguousarray(padded_img, dtype=numpy.float32)
97+
return padded_img, r
98+
99+
100+
def yolox_postprocess(outputs, img_size, p6=False):
101+
grids = []
102+
expanded_strides = []
103+
if not p6:
104+
strides = [8, 16, 32]
105+
else:
106+
strides = [8, 16, 32, 64]
107+
hsizes = [img_size[0] // stride for stride in strides]
108+
wsizes = [img_size[1] // stride for stride in strides]
109+
for hsize, wsize, stride in zip(hsizes, wsizes, strides):
110+
xv, yv = numpy.meshgrid(numpy.arange(wsize), numpy.arange(hsize))
111+
grid = numpy.stack((xv, yv), 2).reshape(1, -1, 2)
112+
grids.append(grid)
113+
shape = grid.shape[:2]
114+
expanded_strides.append(numpy.full((*shape, 1), stride))
115+
grids = numpy.concatenate(grids, 1)
116+
expanded_strides = numpy.concatenate(expanded_strides, 1)
117+
outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides
118+
outputs[..., 2:4] = numpy.exp(outputs[..., 2:4]) * expanded_strides
119+
return outputs
120+
121+
122+
def nms(boxes, scores, nms_thr):
123+
"""Single class NMS implemented in Numpy."""
124+
x1 = boxes[:, 0]
125+
y1 = boxes[:, 1]
126+
x2 = boxes[:, 2]
127+
y2 = boxes[:, 3]
128+
129+
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
130+
order = scores.argsort()[::-1]
131+
132+
keep = []
133+
while order.size > 0:
134+
i = order[0]
135+
keep.append(i)
136+
xx1 = numpy.maximum(x1[i], x1[order[1:]])
137+
yy1 = numpy.maximum(y1[i], y1[order[1:]])
138+
xx2 = numpy.minimum(x2[i], x2[order[1:]])
139+
yy2 = numpy.minimum(y2[i], y2[order[1:]])
140+
141+
w = numpy.maximum(0.0, xx2 - xx1 + 1)
142+
h = numpy.maximum(0.0, yy2 - yy1 + 1)
143+
inter = w * h
144+
ovr = inter / (areas[i] + areas[order[1:]] - inter)
145+
146+
inds = numpy.where(ovr <= nms_thr)[0]
147+
order = order[inds + 1]
148+
149+
return keep
150+
151+
152+
def multiclass_nms(boxes, scores, nms_thr, score_thr, class_agnostic=True):
153+
"""Multiclass NMS implemented in Numpy"""
154+
if class_agnostic:
155+
nms_method = multiclass_nms_class_agnostic
156+
else:
157+
nms_method = multiclass_nms_class_aware
158+
return nms_method(boxes, scores, nms_thr, score_thr)
159+
160+
161+
def multiclass_nms_class_agnostic(boxes, scores, nms_thr, score_thr):
162+
"""Multiclass NMS implemented in Numpy. Class-agnostic version."""
163+
cls_inds = scores.argmax(1)
164+
cls_scores = scores[numpy.arange(len(cls_inds)), cls_inds]
165+
166+
valid_score_mask = cls_scores > score_thr
167+
if valid_score_mask.sum() == 0:
168+
return None
169+
valid_scores = cls_scores[valid_score_mask]
170+
valid_boxes = boxes[valid_score_mask]
171+
valid_cls_inds = cls_inds[valid_score_mask]
172+
keep = nms(valid_boxes, valid_scores, nms_thr)
173+
if keep:
174+
dets = numpy.concatenate(
175+
[valid_boxes[keep], valid_scores[keep, None], valid_cls_inds[keep, None]], 1
176+
)
177+
return dets
178+
179+
180+
def multiclass_nms_class_aware(boxes, scores, nms_thr, score_thr):
181+
"""Multiclass NMS implemented in Numpy. Class-aware version."""
182+
final_dets = []
183+
num_classes = scores.shape[1]
184+
for cls_ind in range(num_classes):
185+
cls_scores = scores[:, cls_ind]
186+
valid_score_mask = cls_scores > score_thr
187+
if valid_score_mask.sum() == 0:
188+
continue
189+
else:
190+
valid_scores = cls_scores[valid_score_mask]
191+
valid_boxes = boxes[valid_score_mask]
192+
keep = nms(valid_boxes, valid_scores, nms_thr)
193+
if len(keep) > 0:
194+
cls_inds = numpy.ones((len(keep), 1)) * cls_ind
195+
dets = numpy.concatenate(
196+
[valid_boxes[keep], valid_scores[keep, None], cls_inds], 1
197+
)
198+
final_dets.append(dets)
199+
if len(final_dets) == 0:
200+
return None
201+
return numpy.concatenate(final_dets, 0)
202+
203+
204+
def img_show(img, boxes, scores, cls_ids, conf=0.5, class_names=None):
205+
_COLORS = numpy.array([255, 0, 0,
206+
195, 123, 40,
207+
110, 176, 23]).astype(numpy.float32).reshape(-1, 3)
208+
for i in range(len(boxes)):
209+
box = boxes[i]
210+
cls_id = int(cls_ids[i])
211+
score = scores[i]
212+
if score < conf:
213+
continue
214+
x0 = int(box[0])
215+
y0 = int(box[1])
216+
x1 = int(box[2])
217+
y1 = int(box[3])
218+
219+
color = _COLORS[cls_id].astype(numpy.uint8).tolist()
220+
text = '{}:{:.1f}%'.format(class_names[cls_id], score * 100)
221+
txt_color = (0, 0, 0) if numpy.mean(_COLORS[cls_id]) > 128 else (255, 255, 255)
222+
font = cv2.FONT_HERSHEY_SIMPLEX
223+
224+
txt_size = cv2.getTextSize(text, font, 0.4, 1)[0]
225+
cv2.rectangle(img, (x0, y0), (x1, y1), color, 3)
226+
227+
txt_bk_color = (_COLORS[cls_id] * 0.7).astype(numpy.uint8).tolist()
228+
cv2.rectangle(
229+
img,
230+
(x0, y0 + 1),
231+
(x0 + txt_size[0] + 1, y0 + int(1.5*txt_size[1])),
232+
txt_bk_color,
233+
-1
234+
)
235+
cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1)
236+
237+
return img

0 commit comments

Comments
 (0)