Added support for ssd-vgg model (#2928)

Anna Grebneva · web-flow · commit 5d6a0ea9105d · 2021-11-25T22:44:16.000+03:00
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/adapters/README.md b/tools/accuracy_checker/openvino/tools/accuracy_checker/adapters/README.md
@@ -459,7 +459,7 @@ AccuracyChecker supports following set of adapters:
 * `noise_suppression` - converts output of audio denoising model to `NoiseSuppressionPrediction`.
   * `output_blob` - name of output layer with processed signal (Optional, if not provided, first found output from model will be used).
 * `kaldi_latgen_faster_mapped` - decodes output Kaldi\* automatic speech recognition model using lattice generation approach with transition model to `CharcterRecognitionPrediction`.
-  **Important note** This adapter requires [Kaldi\* installation](https://kaldi-asr.org/doc/install.html)(we recommend to use `67db30cc` commit)
+  **Important note** This adapter requires [Kaldi\* installation](https://kaldi-asr.org/doc/install.html) (we recommend to use `67db30cc` commit)
   and providing path to directory with compiled executable apps: `latgen-faster-mapped`, `lattice-scale`, `lattice-add-penalty`, `lattice-best-path`.
   Path directory can be provided using `--kaldi_bin_dir` commandline argument or `KALDI_BIN_DIR` environment variable.
   * `fst_file` - Weighted Finite-State Transducers (WFST) state graph file.
@@ -481,3 +481,9 @@ AccuracyChecker supports following set of adapters:
   * `output_name` - name of output node to convert.
 * `mask_to_binary_classification` - converts output of model represented as segmentation mask to `ArgMaxClassificationPrediction`. Class label calculated as comparision maximal probability in mask with given threshold.
   * `threshold` - probability threshold for label 1 (Optional, default 0.5).
+* `ssd_multilabel` - converting output of SSD-based model where multiple labels can correspond to one box to `DetectionPrediction` representation.
+  * `scores_out` - name of output layer with bounding boxes scores.
+  * `boxes_out` - name of output layer with bounding boxes coordinates.
+  * `confidence_threshold` - lower bound for valid boxes scores (optional, default 0.01).
+  * `nms_threshold` - overlap threshold for NMS (optional, default 0.45).
+  * `keep_top_k ` - maximal number of boxes which should be kept (optional, default 200).
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/adapters/__init__.py b/tools/accuracy_checker/openvino/tools/accuracy_checker/adapters/__init__.py
@@ -67,7 +67,7 @@
     PersonVehicleDetectionRefinementAdapter
 )
 from .detection_head import HeadDetectionAdapter
-from .ssd import SSDAdapter, PyTorchSSDDecoder, FacePersonAdapter, SSDAdapterMxNet, SSDONNXAdapter
+from .ssd import SSDAdapter, PyTorchSSDDecoder, FacePersonAdapter, SSDAdapterMxNet, SSDONNXAdapter, SSDMultiLabelAdapter
 from .retinaface import RetinaFaceAdapter, RetinaFacePyTorchAdapter
 from .retinanet import RetinaNetAdapter, MultiOutRetinaNet, RetinaNetTF2
 from .yolo import (
@@ -167,6 +167,7 @@
     'SSDONNXAdapter',
     'PyTorchSSDDecoder',
     'FacePersonAdapter',
+    'SSDMultiLabelAdapter',
 
     'RetinaNetAdapter',
     'MultiOutRetinaNet',
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/adapters/ssd.py b/tools/accuracy_checker/openvino/tools/accuracy_checker/adapters/ssd.py
@@ -380,3 +380,72 @@ def find_layer(regex, output_name, all_outputs):
         self.bboxes_out = find_layer(bboxes_regex, 'bboxes', raw_outputs)
 
         self.outputs_verified = True
+
+
+class SSDMultiLabelAdapter(Adapter):
+    __provider__ = 'ssd_multilabel'
+
+    @classmethod
+    def parameters(cls):
+        params = super().parameters()
+        params.update({
+            'scores_out': StringField(description='scores output'),
+            'boxes_out': StringField(description='boxes output'),
+            'confidence_threshold': NumberField(optional=True, default=0.01, description="confidence threshold"),
+            'nms_threshold': NumberField(optional=True, default=0.45, description="NMS threshold"),
+            'keep_top_k': NumberField(optional=True, value_type=int, default=200, description="keep top K")
+        })
+        return params
+
+    def configure(self):
+        self.scores_out = self.get_value_from_config('scores_out')
+        self.boxes_out = self.get_value_from_config('boxes_out')
+        self.confidence_threshold = self.get_value_from_config('confidence_threshold')
+        self.iou_threshold = self.get_value_from_config('nms_threshold')
+        self.keep_top_k = self.get_value_from_config('keep_top_k')
+        self.outputs_verified = False
+
+    def select_output_blob(self, outputs):
+        self.scores_out = self.check_output_name(self.scores_out, outputs)
+        self.boxes_out = self.check_output_name(self.boxes_out, outputs)
+        self.outputs_verified = True
+
+    def process(self, raw, identifiers, frame_meta):
+        result = []
+        raw_output = self._extract_predictions(raw, frame_meta)
+        if not self.outputs_verified:
+            self.select_output_blob(raw_output)
+
+        for identifier, logits, boxes in zip(identifiers, raw_output[self.scores_out], raw_output[self.boxes_out]):
+            detections = {'labels': [], 'scores': [], 'x_mins': [], 'y_mins': [], 'x_maxs': [], 'y_maxs': []}
+            for class_index in range(1, logits.shape[-1]):
+                probs = logits[:, class_index]
+                mask = probs > self.confidence_threshold
+                probs = probs[mask]
+                if probs.size == 0:
+                    continue
+                subset_boxes = boxes[mask, :]
+
+                x_mins, y_mins, x_maxs, y_maxs = subset_boxes.T
+
+                keep = NMS.nms(x_mins, y_mins, x_maxs, y_maxs, probs, self.iou_threshold, include_boundaries=False,
+                               keep_top_k=self.keep_top_k)
+
+                filtered_probs = probs[keep]
+                x_mins = x_mins[keep]
+                y_mins = y_mins[keep]
+                x_maxs = x_maxs[keep]
+                y_maxs = y_maxs[keep]
+
+                labels = [class_index] * filtered_probs.size
+                detections['labels'].extend(labels)
+                detections['scores'].extend(filtered_probs)
+                detections['x_mins'].extend(x_mins)
+                detections['y_mins'].extend(y_mins)
+                detections['x_maxs'].extend(x_maxs)
+                detections['y_maxs'].extend(y_maxs)
+
+            result.append(DetectionPrediction(identifier, detections['labels'], detections['scores'],
+                                              detections['x_mins'], detections['y_mins'], detections['x_maxs'],
+                                              detections['y_maxs']))
+        return result