openvinotoolkit
diff --git a/‎demos/common/python/openvino/model_zoo/model_api/README.md
Lines changed: 1 addition & 1 deletion b/‎demos/common/python/openvino/model_zoo/model_api/README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎demos/common/python/openvino/model_zoo/model_api/models/__init__.py
Lines changed: 3 additions & 0 deletions b/‎demos/common/python/openvino/model_zoo/model_api/models/__init__.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎demos/common/python/openvino/model_zoo/model_api/models/image_model.py
Lines changed: 1 addition & 0 deletions b/‎demos/common/python/openvino/model_zoo/model_api/models/image_model.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎demos/common/python/openvino/model_zoo/model_api/models/nanodet.py
Lines changed: 130 additions & 0 deletions b/‎demos/common/python/openvino/model_zoo/model_api/models/nanodet.py
Lines changed: 130 additions & 0 deletions
diff --git a/‎demos/common/python/openvino/model_zoo/model_api/models/utils.py
Lines changed: 2 additions & 2 deletions b/‎demos/common/python/openvino/model_zoo/model_api/models/utils.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎demos/object_detection_demo/python/README.md
Lines changed: 13 additions & 3 deletions b/‎demos/object_detection_demo/python/README.md
Lines changed: 13 additions & 3 deletions
diff --git a/‎demos/object_detection_demo/python/models.lst
Lines changed: 4 additions & 0 deletions b/‎demos/object_detection_demo/python/models.lst
Lines changed: 4 additions & 0 deletions
diff --git a/‎demos/object_detection_demo/python/object_detection_demo.py
Lines changed: 8 additions & 2 deletions b/‎demos/object_detection_demo/python/object_detection_demo.py
Lines changed: 8 additions & 2 deletions
diff --git a/‎demos/tests/cases.py
Lines changed: 18 additions & 0 deletions b/‎demos/tests/cases.py
Lines changed: 18 additions & 0 deletions
diff --git a/‎models/public/device_support.md
Lines changed: 2 additions & 0 deletions b/‎models/public/device_support.md
Lines changed: 2 additions & 0 deletions
@@ -66,7 +66,7 @@ The following tasks can be solved with wrappers usage:
 | Instance Segmentation      | <ul><li>`MaskRCNNModel`</li><li>`YolactModel`</li></ul> |
 | Monocular Depth Estimation | <ul><li> `MonoDepthModel`</li></ul> |
 | Named Entity Recognition   | <ul><li>`BertNamedEntityRecognition`</li></ul> |
-| Object Detection           | <ul><li>`CenterNet`</li><li>`DETR`</li><li>`CTPN`</li><li>`FaceBoxes`</li><li>`RetinaFace`</li><li>`RetinaFacePyTorch`</li><li>`SSD`</li><li>`UltraLightweightFaceDetection`</li><li>`YOLO`</li><li>`YoloV3ONNX`</li><li>`YoloV4`</li><li>`YOLOF`</li><li>`YOLOX`</li></ul> |
+| Object Detection           | <ul><li>`CenterNet`</li><li>`DETR`</li><li>`CTPN`</li><li>`FaceBoxes`</li><li>`NanoDet`</li><li>`NanoDetPlus`</li><li>`RetinaFace`</li><li>`RetinaFacePyTorch`</li><li>`SSD`</li><li>`UltraLightweightFaceDetection`</li><li>`YOLO`</li><li>`YoloV3ONNX`</li><li>`YoloV4`</li><li>`YOLOF`</li><li>`YOLOX`</li></ul> |
 | Question Answering         |  <ul><li>`BertQuestionAnswering`</li></ul> |
 | Salient Object Detection   |  <ul><li>`SalientObjectDetectionModel`</li></ul> |
 | Semantic Segmentation      |  <ul><li>`SegmentationModel`</li></ul> |
 
@@ -29,6 +29,7 @@
 from .instance_segmentation import MaskRCNNModel, YolactModel
 from .model import Model
 from .monodepth import MonoDepthModel
+from .nanodet import NanoDet, NanoDetPlus
 from .open_pose import OpenPose
 from .retinaface import RetinaFace, RetinaFacePyTorch
 from .segmentation import SegmentationModel, SalientObjectDetectionModel
@@ -56,6 +57,8 @@
     'MaskRCNNModel',
     'Model',
     'MonoDepthModel',
+    'NanoDet',
+    'NanoDetPlus',
     'OpenPose',
     'OutputTransform',
     'PortraitBackgroundMatting',
 
@@ -140,6 +140,7 @@ def preprocess(self, inputs):
         meta.update({'resized_shape': resized_image.shape})
         if self.resize_type == 'fit_to_window':
             resized_image = pad_image(resized_image, (self.w, self.h))
+            meta.update({'padded_shape': resized_image.shape})
         resized_image = self.input_transform(resized_image)
         resized_image = self._change_layout(resized_image)
         dict_inputs = {self.image_blob_name: resized_image}
 
@@ -0,0 +1,130 @@
+"""
+ Copyright (c) 2022 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import math
+import numpy as np
+
+from .types import NumericalValue
+from .detection_model import DetectionModel
+from .utils import Detection, softmax, nms, clip_detections
+
+
+class NanoDet(DetectionModel):
+    __model__ = 'NanoDet'
+
+    def __init__(self, model_adapter, configuration=None, preload=False):
+        super().__init__(model_adapter, configuration, preload)
+        self._check_io_number(1, 1)
+        self.output_blob_name = self._get_outputs()
+        self.reg_max = 7
+        self.strides = [8, 16, 32]
+        self.ad = 0.5
+
+    def _get_outputs(self):
+        output_blob_name = next(iter(self.outputs))
+        output_size = self.outputs[output_blob_name].shape
+        if len(output_size) != 3:
+            self.raise_error("Unexpected output blob shape {}. Only 3D output blob is supported".format(output_size))
+
+        return output_blob_name
+
+    @classmethod
+    def parameters(cls):
+        parameters = super().parameters()
+        parameters['resize_type'].update_default_value('fit_to_window')
+        parameters['confidence_threshold'].update_default_value(0.5)
+        parameters.update({
+            'iou_threshold': NumericalValue(default_value=0.6, description="Threshold for NMS filtering"),
+            'num_classes': NumericalValue(default_value=80, value_type=int, description="Number of classes")
+        })
+        return parameters
+
+    def postprocess(self, outputs, meta):
+        detections = self._parse_outputs(outputs, meta)
+        detections = self.rescale_detections(detections, meta)
+        return detections
+
+    def _parse_outputs(self, outputs, meta):
+        output = outputs[self.output_blob_name][0]
+
+        cls_scores = output[:, :self.num_classes]
+        bbox_preds = output[:, self.num_classes:]
+        input_height, input_width = meta['padded_shape'][:2] if meta.get('padded_shape') else meta['resized_shape'][:2]
+
+        bboxes = self.get_bboxes(bbox_preds, input_height, input_width)
+        dets = []
+        for label, score in enumerate(np.transpose(cls_scores)):
+            mask = score > self.confidence_threshold
+            filtered_boxes, score = bboxes[mask, :], score[mask]
+            if score.size == 0:
+                continue
+            x_mins, y_mins, x_maxs, y_maxs = filtered_boxes.T
+            keep = nms(x_mins, y_mins, x_maxs, y_maxs, score, self.iou_threshold, include_boundaries=True)
+            score = score[keep]
+            x_mins, y_mins, x_maxs, y_maxs = x_mins[keep], y_mins[keep], x_maxs[keep], y_maxs[keep]
+            labels = np.full_like(score, label, dtype=int)
+            dets += [Detection(*det) for det in zip(x_mins, y_mins, x_maxs, y_maxs, score, labels)]
+        return dets
+
+    @staticmethod
+    def distance2bbox(points, distance, max_shape):
+        x1 = np.expand_dims(points[:, 0] - distance[:, 0], -1).clip(0, max_shape[1])
+        y1 = np.expand_dims(points[:, 1] - distance[:, 1], -1).clip(0, max_shape[0])
+        x2 = np.expand_dims(points[:, 0] + distance[:, 2], -1).clip(0, max_shape[1])
+        y2 = np.expand_dims(points[:, 1] + distance[:, 3], -1).clip(0, max_shape[0])
+        return np.concatenate((x1, y1, x2, y2), axis=-1)
+
+    def get_single_level_center_point(self, featmap_size, stride):
+        h, w = featmap_size
+        x_range, y_range = (np.arange(w) + self.ad) * stride, (np.arange(h) + self.ad) * stride
+        y, x = np.meshgrid(y_range, x_range, indexing='ij')
+        return y.flatten(), x.flatten()
+
+    def get_bboxes(self, reg_preds, input_height, input_width):
+        featmap_sizes = [(math.ceil(input_height / stride), math.ceil(input_width) / stride) for stride in self.strides]
+        list_center_priors = []
+        for stride, featmap_size in zip(self.strides, featmap_sizes):
+            y, x = self.get_single_level_center_point(featmap_size, stride)
+            strides = np.full_like(x, stride)
+            list_center_priors.append(np.stack([x, y, strides, strides], axis=-1))
+        center_priors = np.concatenate(list_center_priors, axis=0)
+        dist_project = np.linspace(0, self.reg_max, self.reg_max + 1)
+        x = np.dot(softmax(np.reshape(reg_preds, (*reg_preds.shape[:-1], 4, self.reg_max + 1)), -1, True), dist_project)
+        dis_preds = x * np.expand_dims(center_priors[:, 2], -1)
+        return self.distance2bbox(center_priors[:, :2], dis_preds, (input_height, input_width))
+
+    @staticmethod
+    def rescale_detections(detections, meta):
+        input_h, input_w, _ = meta['resized_shape']
+        orig_h, orig_w, _ = meta['original_shape']
+        w = orig_w / input_w
+        h = orig_h / input_h
+
+        for detection in detections:
+            detection.xmin *= w
+            detection.xmax *= w
+            detection.ymin *= h
+            detection.ymax *= h
+
+        return clip_detections(detections, meta['original_shape'])
+
+
+class NanoDetPlus(NanoDet):
+    __model__ = 'NanoDet-Plus'
+
+    def __init__(self, model_adapter, configuration=None, preload=False):
+        super().__init__(model_adapter, configuration, preload)
+        self.ad = 0
+        self.strides = [8, 16, 32, 64]
@@ -208,6 +208,6 @@ def nms(x1, y1, x2, y2, scores, thresh, include_boundaries=False, keep_top_k=Non
     return keep
 
 
-def softmax(logits, axis=None):
+def softmax(logits, axis=None, keepdims=False):
     exp = np.exp(logits)
-    return exp / np.sum(exp, axis=axis)
+    return exp / np.sum(exp, axis=axis, keepdims=keepdims)
@@ -72,6 +72,10 @@ omz_converter --list models.lst
   - detr-resnet50
 * architecture_type = faceboxes
   - faceboxes-pytorch
+* architecture_type = nanodet
+  - nanodet-m-1.5x-416
+* architecture_type = nanodet-plus
+  - nanodet-plus-m-1.5x-416
 * architecture_type = retinaface-pytorch
   - retinaface-resnet50-pytorch
 * architecture_type = ssd
@@ -159,12 +163,13 @@ Running the application with the `-h` option yields the following usage message:
 
 ```
 usage: object_detection_demo.py [-h] -m MODEL -at
-                                {ssd,yolo,yolov3-onnx,yolov4,yolof,yolox,faceboxes,centernet,ctpn,retinaface,ultra_lightweight_face_detection,retinaface-pytorch,detr}
+                                {centernet,detr,ctpn,faceboxes,nanodet,nanodet-plus,retinaface,retinaface-pytorch,ssd,ultra_lightweight_face_detection,yolo,yolov4,yolof,yolox,yolov3-onnx}
                                 -i INPUT [--adapter {openvino,ovms}]
                                 [-d DEVICE] [--labels LABELS] [-t PROB_THRESHOLD]
                                 [--resize_type {standard,fit_to_window,fit_to_window_letterbox}]
                                 [--input_size INPUT_SIZE INPUT_SIZE] [--anchors ANCHORS [ANCHORS ...]]
-                                [--masks MASKS [MASKS ...]] [-nireq NUM_INFER_REQUESTS] [-nstreams NUM_STREAMS]
+                                [--masks MASKS [MASKS ...]] [--layout LAYOUT]
+                                [--num_classes NUM_CLASSES][-nireq NUM_INFER_REQUESTS] [-nstreams NUM_STREAMS]
                                 [-nthreads NUM_THREADS] [--loop] [-o OUTPUT] [-limit OUTPUT_LIMIT] [--no_show]
                                 [--output_resolution OUTPUT_RESOLUTION] [-u UTILIZATION_MONITORS]
                                 [--reverse_input_channels] [--mean_values MEAN_VALUES MEAN_VALUES MEAN_VALUES]
@@ -175,7 +180,7 @@ Options:
   -m MODEL, --model MODEL
                         Required. Path to an .xml file with a trained model or
                         address of model inference service if using OVMS adapter.
-  -at, --architecture_type  Required. Specify model' architecture type. Valid values are {ssd,yolo,yolov3-onnx,yolov4,yolof,yolox,faceboxes,centernet,ctpn,retinaface,ultra_lightweight_face_detection,retinaface-pytorch,detr}.
+  -at, --architecture_type  Required. Specify model' architecture type. Valid values are {centernet,detr,ctpn,faceboxes,nanodet,nanodet-plus,retinaface,retinaface-pytorch,ssd,ultra_lightweight_face_detection,yolo,yolov4,yolof,yolox,yolov3-onnx}.
   -i INPUT, --input INPUT
                         Required. An input to process. The input must be a
                         single image, a folder of images, video file or camera id.
@@ -206,6 +211,11 @@ Common model options:
   --masks MASKS [MASKS ...]
                         Optional. A space separated list of mask for anchors. By default used default masks for model.
                         Only for YOLOV4 architecture type.
+  --layout LAYOUT       Optional. Model inputs layouts. Ex. NCHW or
+                        input0:NCHW,input1:NC in case of more than one input.
+  --num_classes NUM_CLASSES
+                        Optional. Number of detected classes. Only for NanoDet, NanoDetPlus
+                        architecture types.
 
 Inference options:
   -nireq NUM_INFER_REQUESTS, --num_infer_requests NUM_INFER_REQUESTS
 
@@ -7,6 +7,10 @@ ctpn
 detr-resnet50
 # For --architecture_type=faceboxes
 faceboxes-pytorch
+# For --architecture_type = nanodet
+nanodet-m-1.5x-416
+# For --architecture_type = nanodet-plus
+nanodet-plus-m-1.5x-416
 # For --architecture_type=retinaface-pytorch
 retinaface-resnet50-pytorch
 # For --architecture_type=ssd
 
@@ -77,6 +77,9 @@ def build_argparser():
     common_model_args.add_argument('--layout', type=str, default=None,
                                    help='Optional. Model inputs layouts. '
                                         'Ex. NCHW or input0:NCHW,input1:NC in case of more than one input.')
+    common_model_args.add_argument('--num_classes', default=None, type=int,
+                                   help='Optional. Number of detected classes. Only for NanoDet, NanoDetPlus '
+                                        'architecture types.')
 
     infer_args = parser.add_argument_group('Inference options')
     infer_args.add_argument('-nireq', '--num_infer_requests', help='Optional. Number of infer requests',
@@ -155,9 +158,11 @@ def print_raw_results(detections, labels, frame_id):
 def main():
     args = build_argparser().parse_args()
     if args.architecture_type != 'yolov4' and args.anchors:
-        log.warning('The "--anchors" options works only for "-at==yolov4". Option will be omitted')
+        log.warning('The "--anchors" option works only for "-at==yolov4". Option will be omitted')
     if args.architecture_type != 'yolov4' and args.masks:
-        log.warning('The "--masks" options works only for "-at==yolov4". Option will be omitted')
+        log.warning('The "--masks" option works only for "-at==yolov4". Option will be omitted')
+    if args.architecture_type not in ['nanodet', 'nanodet-plus'] and args.num_classes:
+        log.warning('The "--num_classes" option works only for "-at==nanodet" and "-at==nanodet-plus". Option will be omitted')
 
     cap = open_images_capture(args.input, args.loop)
 
@@ -176,6 +181,7 @@ def main():
         'path_to_labels': args.labels,
         'confidence_threshold': args.prob_threshold,
         'input_size': args.input_size, # The CTPN specific
+        'num_classes': args.num_classes, # The NanoDet and NanoDetPlus specific
     }
     model = DetectionModel.create_model(args.architecture_type, model_adapter, configuration)
     model.log_layers_info()
 
@@ -1221,6 +1221,24 @@ def single_option_cases(key, *args):
                                       '--scale_values': ['58.395', '57.12', '57.375']}),
                 ]
             ),
+            *combine_cases(
+                TestCase(options={'--architecture_type': 'nanodet'}),
+                [
+                    TestCase(options={'-m': ModelArg('nanodet-m-1.5x-416')}),
+                    TestCase(options={'-m': ModelFileArg('nanodet-m-1.5x-416', 'nanodet-m-1.5x-416.onnx'),
+                                      '--mean_values': ['103.53', '116.28', '123.675'],
+                                      '--scale_values': ['57.375', '57.12', '58.395']}),
+                ]
+            ),
+            *combine_cases(
+                TestCase(options={'--architecture_type': 'nanodet-plus'}),
+                [
+                    TestCase(options={'-m': ModelArg('nanodet-plus-m-1.5x-416')}),
+                    TestCase(options={'-m': ModelFileArg('nanodet-plus-m-1.5x-416', 'nanodet-plus-m-1.5x-416.onnx'),
+                                      '--mean_values': ['103.53', '116.28', '123.675'],
+                                      '--scale_values': ['57.375', '57.12', '58.395']}),
+                ]
+            ),
         ],
     )),
 
 
@@ -88,6 +88,8 @@
 | mtcnn-o | YES | YES |    |
 | mtcnn-p | YES | YES |    |
 | mtcnn-r | YES | YES |    |
+| nanodet-m-1.5x-416 | YES | YES |    |
+| nanodet-plus-m-1.5x-416 | YES | YES |    |
 | netvlad-tf | YES |    |    |
 | nfnet-f0 | YES | YES | YES |
 | ocrnet-hrnet-w48-paddle | YES | YES |    |