Skip to content

Commit d9a1fa1

Browse files
author
Anna Grebneva
authored
Added NanoDet models (#3406)
1 parent 49006cc commit d9a1fa1

File tree

22 files changed

+1175
-8
lines changed

22 files changed

+1175
-8
lines changed

demos/common/python/openvino/model_zoo/model_api/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ The following tasks can be solved with wrappers usage:
6666
| Instance Segmentation | <ul><li>`MaskRCNNModel`</li><li>`YolactModel`</li></ul> |
6767
| Monocular Depth Estimation | <ul><li> `MonoDepthModel`</li></ul> |
6868
| Named Entity Recognition | <ul><li>`BertNamedEntityRecognition`</li></ul> |
69-
| Object Detection | <ul><li>`CenterNet`</li><li>`DETR`</li><li>`CTPN`</li><li>`FaceBoxes`</li><li>`RetinaFace`</li><li>`RetinaFacePyTorch`</li><li>`SSD`</li><li>`UltraLightweightFaceDetection`</li><li>`YOLO`</li><li>`YoloV3ONNX`</li><li>`YoloV4`</li><li>`YOLOF`</li><li>`YOLOX`</li></ul> |
69+
| Object Detection | <ul><li>`CenterNet`</li><li>`DETR`</li><li>`CTPN`</li><li>`FaceBoxes`</li><li>`NanoDet`</li><li>`NanoDetPlus`</li><li>`RetinaFace`</li><li>`RetinaFacePyTorch`</li><li>`SSD`</li><li>`UltraLightweightFaceDetection`</li><li>`YOLO`</li><li>`YoloV3ONNX`</li><li>`YoloV4`</li><li>`YOLOF`</li><li>`YOLOX`</li></ul> |
7070
| Question Answering | <ul><li>`BertQuestionAnswering`</li></ul> |
7171
| Salient Object Detection | <ul><li>`SalientObjectDetectionModel`</li></ul> |
7272
| Semantic Segmentation | <ul><li>`SegmentationModel`</li></ul> |

demos/common/python/openvino/model_zoo/model_api/models/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from .instance_segmentation import MaskRCNNModel, YolactModel
3030
from .model import Model
3131
from .monodepth import MonoDepthModel
32+
from .nanodet import NanoDet, NanoDetPlus
3233
from .open_pose import OpenPose
3334
from .retinaface import RetinaFace, RetinaFacePyTorch
3435
from .segmentation import SegmentationModel, SalientObjectDetectionModel
@@ -56,6 +57,8 @@
5657
'MaskRCNNModel',
5758
'Model',
5859
'MonoDepthModel',
60+
'NanoDet',
61+
'NanoDetPlus',
5962
'OpenPose',
6063
'OutputTransform',
6164
'PortraitBackgroundMatting',

demos/common/python/openvino/model_zoo/model_api/models/image_model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ def preprocess(self, inputs):
140140
meta.update({'resized_shape': resized_image.shape})
141141
if self.resize_type == 'fit_to_window':
142142
resized_image = pad_image(resized_image, (self.w, self.h))
143+
meta.update({'padded_shape': resized_image.shape})
143144
resized_image = self.input_transform(resized_image)
144145
resized_image = self._change_layout(resized_image)
145146
dict_inputs = {self.image_blob_name: resized_image}
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
"""
2+
Copyright (c) 2022 Intel Corporation
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
"""
16+
import math
17+
import numpy as np
18+
19+
from .types import NumericalValue
20+
from .detection_model import DetectionModel
21+
from .utils import Detection, softmax, nms, clip_detections
22+
23+
24+
class NanoDet(DetectionModel):
25+
__model__ = 'NanoDet'
26+
27+
def __init__(self, model_adapter, configuration=None, preload=False):
28+
super().__init__(model_adapter, configuration, preload)
29+
self._check_io_number(1, 1)
30+
self.output_blob_name = self._get_outputs()
31+
self.reg_max = 7
32+
self.strides = [8, 16, 32]
33+
self.ad = 0.5
34+
35+
def _get_outputs(self):
36+
output_blob_name = next(iter(self.outputs))
37+
output_size = self.outputs[output_blob_name].shape
38+
if len(output_size) != 3:
39+
self.raise_error("Unexpected output blob shape {}. Only 3D output blob is supported".format(output_size))
40+
41+
return output_blob_name
42+
43+
@classmethod
44+
def parameters(cls):
45+
parameters = super().parameters()
46+
parameters['resize_type'].update_default_value('fit_to_window')
47+
parameters['confidence_threshold'].update_default_value(0.5)
48+
parameters.update({
49+
'iou_threshold': NumericalValue(default_value=0.6, description="Threshold for NMS filtering"),
50+
'num_classes': NumericalValue(default_value=80, value_type=int, description="Number of classes")
51+
})
52+
return parameters
53+
54+
def postprocess(self, outputs, meta):
55+
detections = self._parse_outputs(outputs, meta)
56+
detections = self.rescale_detections(detections, meta)
57+
return detections
58+
59+
def _parse_outputs(self, outputs, meta):
60+
output = outputs[self.output_blob_name][0]
61+
62+
cls_scores = output[:, :self.num_classes]
63+
bbox_preds = output[:, self.num_classes:]
64+
input_height, input_width = meta['padded_shape'][:2] if meta.get('padded_shape') else meta['resized_shape'][:2]
65+
66+
bboxes = self.get_bboxes(bbox_preds, input_height, input_width)
67+
dets = []
68+
for label, score in enumerate(np.transpose(cls_scores)):
69+
mask = score > self.confidence_threshold
70+
filtered_boxes, score = bboxes[mask, :], score[mask]
71+
if score.size == 0:
72+
continue
73+
x_mins, y_mins, x_maxs, y_maxs = filtered_boxes.T
74+
keep = nms(x_mins, y_mins, x_maxs, y_maxs, score, self.iou_threshold, include_boundaries=True)
75+
score = score[keep]
76+
x_mins, y_mins, x_maxs, y_maxs = x_mins[keep], y_mins[keep], x_maxs[keep], y_maxs[keep]
77+
labels = np.full_like(score, label, dtype=int)
78+
dets += [Detection(*det) for det in zip(x_mins, y_mins, x_maxs, y_maxs, score, labels)]
79+
return dets
80+
81+
@staticmethod
82+
def distance2bbox(points, distance, max_shape):
83+
x1 = np.expand_dims(points[:, 0] - distance[:, 0], -1).clip(0, max_shape[1])
84+
y1 = np.expand_dims(points[:, 1] - distance[:, 1], -1).clip(0, max_shape[0])
85+
x2 = np.expand_dims(points[:, 0] + distance[:, 2], -1).clip(0, max_shape[1])
86+
y2 = np.expand_dims(points[:, 1] + distance[:, 3], -1).clip(0, max_shape[0])
87+
return np.concatenate((x1, y1, x2, y2), axis=-1)
88+
89+
def get_single_level_center_point(self, featmap_size, stride):
90+
h, w = featmap_size
91+
x_range, y_range = (np.arange(w) + self.ad) * stride, (np.arange(h) + self.ad) * stride
92+
y, x = np.meshgrid(y_range, x_range, indexing='ij')
93+
return y.flatten(), x.flatten()
94+
95+
def get_bboxes(self, reg_preds, input_height, input_width):
96+
featmap_sizes = [(math.ceil(input_height / stride), math.ceil(input_width) / stride) for stride in self.strides]
97+
list_center_priors = []
98+
for stride, featmap_size in zip(self.strides, featmap_sizes):
99+
y, x = self.get_single_level_center_point(featmap_size, stride)
100+
strides = np.full_like(x, stride)
101+
list_center_priors.append(np.stack([x, y, strides, strides], axis=-1))
102+
center_priors = np.concatenate(list_center_priors, axis=0)
103+
dist_project = np.linspace(0, self.reg_max, self.reg_max + 1)
104+
x = np.dot(softmax(np.reshape(reg_preds, (*reg_preds.shape[:-1], 4, self.reg_max + 1)), -1, True), dist_project)
105+
dis_preds = x * np.expand_dims(center_priors[:, 2], -1)
106+
return self.distance2bbox(center_priors[:, :2], dis_preds, (input_height, input_width))
107+
108+
@staticmethod
109+
def rescale_detections(detections, meta):
110+
input_h, input_w, _ = meta['resized_shape']
111+
orig_h, orig_w, _ = meta['original_shape']
112+
w = orig_w / input_w
113+
h = orig_h / input_h
114+
115+
for detection in detections:
116+
detection.xmin *= w
117+
detection.xmax *= w
118+
detection.ymin *= h
119+
detection.ymax *= h
120+
121+
return clip_detections(detections, meta['original_shape'])
122+
123+
124+
class NanoDetPlus(NanoDet):
125+
__model__ = 'NanoDet-Plus'
126+
127+
def __init__(self, model_adapter, configuration=None, preload=False):
128+
super().__init__(model_adapter, configuration, preload)
129+
self.ad = 0
130+
self.strides = [8, 16, 32, 64]

demos/common/python/openvino/model_zoo/model_api/models/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,6 @@ def nms(x1, y1, x2, y2, scores, thresh, include_boundaries=False, keep_top_k=Non
208208
return keep
209209

210210

211-
def softmax(logits, axis=None):
211+
def softmax(logits, axis=None, keepdims=False):
212212
exp = np.exp(logits)
213-
return exp / np.sum(exp, axis=axis)
213+
return exp / np.sum(exp, axis=axis, keepdims=keepdims)

demos/object_detection_demo/python/README.md

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@ omz_converter --list models.lst
7272
- detr-resnet50
7373
* architecture_type = faceboxes
7474
- faceboxes-pytorch
75+
* architecture_type = nanodet
76+
- nanodet-m-1.5x-416
77+
* architecture_type = nanodet-plus
78+
- nanodet-plus-m-1.5x-416
7579
* architecture_type = retinaface-pytorch
7680
- retinaface-resnet50-pytorch
7781
* architecture_type = ssd
@@ -159,12 +163,13 @@ Running the application with the `-h` option yields the following usage message:
159163

160164
```
161165
usage: object_detection_demo.py [-h] -m MODEL -at
162-
{ssd,yolo,yolov3-onnx,yolov4,yolof,yolox,faceboxes,centernet,ctpn,retinaface,ultra_lightweight_face_detection,retinaface-pytorch,detr}
166+
{centernet,detr,ctpn,faceboxes,nanodet,nanodet-plus,retinaface,retinaface-pytorch,ssd,ultra_lightweight_face_detection,yolo,yolov4,yolof,yolox,yolov3-onnx}
163167
-i INPUT [--adapter {openvino,ovms}]
164168
[-d DEVICE] [--labels LABELS] [-t PROB_THRESHOLD]
165169
[--resize_type {standard,fit_to_window,fit_to_window_letterbox}]
166170
[--input_size INPUT_SIZE INPUT_SIZE] [--anchors ANCHORS [ANCHORS ...]]
167-
[--masks MASKS [MASKS ...]] [-nireq NUM_INFER_REQUESTS] [-nstreams NUM_STREAMS]
171+
[--masks MASKS [MASKS ...]] [--layout LAYOUT]
172+
[--num_classes NUM_CLASSES][-nireq NUM_INFER_REQUESTS] [-nstreams NUM_STREAMS]
168173
[-nthreads NUM_THREADS] [--loop] [-o OUTPUT] [-limit OUTPUT_LIMIT] [--no_show]
169174
[--output_resolution OUTPUT_RESOLUTION] [-u UTILIZATION_MONITORS]
170175
[--reverse_input_channels] [--mean_values MEAN_VALUES MEAN_VALUES MEAN_VALUES]
@@ -175,7 +180,7 @@ Options:
175180
-m MODEL, --model MODEL
176181
Required. Path to an .xml file with a trained model or
177182
address of model inference service if using OVMS adapter.
178-
-at, --architecture_type Required. Specify model' architecture type. Valid values are {ssd,yolo,yolov3-onnx,yolov4,yolof,yolox,faceboxes,centernet,ctpn,retinaface,ultra_lightweight_face_detection,retinaface-pytorch,detr}.
183+
-at, --architecture_type Required. Specify model' architecture type. Valid values are {centernet,detr,ctpn,faceboxes,nanodet,nanodet-plus,retinaface,retinaface-pytorch,ssd,ultra_lightweight_face_detection,yolo,yolov4,yolof,yolox,yolov3-onnx}.
179184
-i INPUT, --input INPUT
180185
Required. An input to process. The input must be a
181186
single image, a folder of images, video file or camera id.
@@ -206,6 +211,11 @@ Common model options:
206211
--masks MASKS [MASKS ...]
207212
Optional. A space separated list of mask for anchors. By default used default masks for model.
208213
Only for YOLOV4 architecture type.
214+
--layout LAYOUT Optional. Model inputs layouts. Ex. NCHW or
215+
input0:NCHW,input1:NC in case of more than one input.
216+
--num_classes NUM_CLASSES
217+
Optional. Number of detected classes. Only for NanoDet, NanoDetPlus
218+
architecture types.
209219
210220
Inference options:
211221
-nireq NUM_INFER_REQUESTS, --num_infer_requests NUM_INFER_REQUESTS

demos/object_detection_demo/python/models.lst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ ctpn
77
detr-resnet50
88
# For --architecture_type=faceboxes
99
faceboxes-pytorch
10+
# For --architecture_type = nanodet
11+
nanodet-m-1.5x-416
12+
# For --architecture_type = nanodet-plus
13+
nanodet-plus-m-1.5x-416
1014
# For --architecture_type=retinaface-pytorch
1115
retinaface-resnet50-pytorch
1216
# For --architecture_type=ssd

demos/object_detection_demo/python/object_detection_demo.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ def build_argparser():
7777
common_model_args.add_argument('--layout', type=str, default=None,
7878
help='Optional. Model inputs layouts. '
7979
'Ex. NCHW or input0:NCHW,input1:NC in case of more than one input.')
80+
common_model_args.add_argument('--num_classes', default=None, type=int,
81+
help='Optional. Number of detected classes. Only for NanoDet, NanoDetPlus '
82+
'architecture types.')
8083

8184
infer_args = parser.add_argument_group('Inference options')
8285
infer_args.add_argument('-nireq', '--num_infer_requests', help='Optional. Number of infer requests',
@@ -155,9 +158,11 @@ def print_raw_results(detections, labels, frame_id):
155158
def main():
156159
args = build_argparser().parse_args()
157160
if args.architecture_type != 'yolov4' and args.anchors:
158-
log.warning('The "--anchors" options works only for "-at==yolov4". Option will be omitted')
161+
log.warning('The "--anchors" option works only for "-at==yolov4". Option will be omitted')
159162
if args.architecture_type != 'yolov4' and args.masks:
160-
log.warning('The "--masks" options works only for "-at==yolov4". Option will be omitted')
163+
log.warning('The "--masks" option works only for "-at==yolov4". Option will be omitted')
164+
if args.architecture_type not in ['nanodet', 'nanodet-plus'] and args.num_classes:
165+
log.warning('The "--num_classes" option works only for "-at==nanodet" and "-at==nanodet-plus". Option will be omitted')
161166

162167
cap = open_images_capture(args.input, args.loop)
163168

@@ -176,6 +181,7 @@ def main():
176181
'path_to_labels': args.labels,
177182
'confidence_threshold': args.prob_threshold,
178183
'input_size': args.input_size, # The CTPN specific
184+
'num_classes': args.num_classes, # The NanoDet and NanoDetPlus specific
179185
}
180186
model = DetectionModel.create_model(args.architecture_type, model_adapter, configuration)
181187
model.log_layers_info()

demos/tests/cases.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1221,6 +1221,24 @@ def single_option_cases(key, *args):
12211221
'--scale_values': ['58.395', '57.12', '57.375']}),
12221222
]
12231223
),
1224+
*combine_cases(
1225+
TestCase(options={'--architecture_type': 'nanodet'}),
1226+
[
1227+
TestCase(options={'-m': ModelArg('nanodet-m-1.5x-416')}),
1228+
TestCase(options={'-m': ModelFileArg('nanodet-m-1.5x-416', 'nanodet-m-1.5x-416.onnx'),
1229+
'--mean_values': ['103.53', '116.28', '123.675'],
1230+
'--scale_values': ['57.375', '57.12', '58.395']}),
1231+
]
1232+
),
1233+
*combine_cases(
1234+
TestCase(options={'--architecture_type': 'nanodet-plus'}),
1235+
[
1236+
TestCase(options={'-m': ModelArg('nanodet-plus-m-1.5x-416')}),
1237+
TestCase(options={'-m': ModelFileArg('nanodet-plus-m-1.5x-416', 'nanodet-plus-m-1.5x-416.onnx'),
1238+
'--mean_values': ['103.53', '116.28', '123.675'],
1239+
'--scale_values': ['57.375', '57.12', '58.395']}),
1240+
]
1241+
),
12241242
],
12251243
)),
12261244

models/public/device_support.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@
8888
| mtcnn-o | YES | YES | |
8989
| mtcnn-p | YES | YES | |
9090
| mtcnn-r | YES | YES | |
91+
| nanodet-m-1.5x-416 | YES | YES | |
92+
| nanodet-plus-m-1.5x-416 | YES | YES | |
9193
| netvlad-tf | YES | | |
9294
| nfnet-f0 | YES | YES | YES |
9395
| ocrnet-hrnet-w48-paddle | YES | YES | |

0 commit comments

Comments
 (0)