openvinotoolkit
diff --git a/‎tools/accuracy_checker/openvino/tools/accuracy_checker/adapters/README.md
Lines changed: 3 additions & 1 deletion b/‎tools/accuracy_checker/openvino/tools/accuracy_checker/adapters/README.md
Lines changed: 3 additions & 1 deletion
diff --git a/‎tools/accuracy_checker/openvino/tools/accuracy_checker/adapters/__init__.py
Lines changed: 6 additions & 1 deletion b/‎tools/accuracy_checker/openvino/tools/accuracy_checker/adapters/__init__.py
Lines changed: 6 additions & 1 deletion
diff --git a/‎tools/accuracy_checker/openvino/tools/accuracy_checker/adapters/classification.py
Lines changed: 25 additions & 19 deletions b/‎tools/accuracy_checker/openvino/tools/accuracy_checker/adapters/classification.py
Lines changed: 25 additions & 19 deletions
diff --git a/‎tools/accuracy_checker/openvino/tools/accuracy_checker/adapters/segmentation.py
Lines changed: 37 additions & 14 deletions b/‎tools/accuracy_checker/openvino/tools/accuracy_checker/adapters/segmentation.py
Lines changed: 37 additions & 14 deletions
diff --git a/‎tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/README.md
Lines changed: 4 additions & 0 deletions b/‎tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/README.md
Lines changed: 4 additions & 0 deletions
diff --git a/‎tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/__init__.py
Lines changed: 2 additions & 0 deletions b/‎tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/mvtec.py
Lines changed: 80 additions & 0 deletions b/‎tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/mvtec.py
Lines changed: 80 additions & 0 deletions
diff --git a/‎tools/accuracy_checker/openvino/tools/accuracy_checker/metrics/README.md
Lines changed: 4 additions & 2 deletions b/‎tools/accuracy_checker/openvino/tools/accuracy_checker/metrics/README.md
Lines changed: 4 additions & 2 deletions
@@ -33,7 +33,9 @@ AccuracyChecker supports following set of adapters:
   * `label_as_array` - produce ClassificationPrediction's label as array
 * `segmentation` - converting output of semantic segmentation model to `SeegmentationPrediction` representation.
   * `make_argmax` - allows applying argmax operation to output values.
-* `segmentation_one_class` - converting output of semantic segmentation to `SeegmentationPrediction` representation. It is suitable for situation when model's output is probability of belong each pixel to foreground class.
+* `segmentation_one_class` - converting output of semantic segmentation model to `SeegmentationPrediction` representation. It is suitable for cases when model's output is probability of belong each pixel to foreground class.
+  * `threshold` - minimum probability threshold for valid class belonging.
+* `anomaly_segmentation` - converting output of anomaly segmentation model to `AnomalySeegmentationPrediction` representation.
   * `threshold` - minimum probability threshold for valid class belonging.
 * `tiny_yolo_v1` - converting output of Tiny YOLO v1 model to `DetectionPrediction` representation.
 * `reid` - converting output of reidentification model to `ReIdentificationPrediction` representation.
 
@@ -75,7 +75,11 @@
 )
 from .classification import ClassificationAdapter, MaskToBinaryClassification
 from .segmentation import (
-    SegmentationAdapter, BrainTumorSegmentationAdapter, DUCSegmentationAdapter, BackgroundMattingAdapter
+    SegmentationAdapter,
+    BrainTumorSegmentationAdapter,
+    DUCSegmentationAdapter,
+    BackgroundMattingAdapter,
+    AnomalySegmentationAdapter
 )
 from .facial_landmarks_98_detection import FacialLandmarksAdapter
 from .pose_estimation import HumanPoseAdapter, SingleHumanPoseAdapter, StackedHourGlassNetworkAdapter
@@ -177,6 +181,7 @@
     'DUCSegmentationAdapter',
     'SalientObjectDetection',
     'BackgroundMattingAdapter',
+    'AnomalySegmentationAdapter',
 
     'ReidAdapter',
 
 
@@ -19,6 +19,7 @@
 from ..adapters import Adapter
 from ..config import BoolField, StringField, NumberField
 from ..representation import ClassificationPrediction, ArgMaxClassificationPrediction
+from ..utils import softmax
 
 
 class ClassificationAdapter(Adapter):
@@ -47,7 +48,12 @@ def parameters(cls):
             'label_as_array': BoolField(
                 optional=True, default=False, description="produce ClassificationPrediction's label as array"
             ),
-            'classification_output': StringField(optional=True, description='target output layer name')
+            'classification_output': StringField(optional=True, description='target output layer name'),
+            'multi_label_threshold': NumberField(
+                optional=True, value_type=float,
+                description='threshold for treating classification as multi label problem'),
+            'do_softmax': BoolField(
+                optional=True, description='apply softmax on probabilities in logits format', default=False)
         })
 
         return parameters
@@ -59,6 +65,8 @@ def configure(self):
         self.fixed_output = self.get_value_from_config('fixed_output')
         self.fixed_output_index = int(self.get_value_from_config('fixed_output_index'))
         self.label_as_array = self.get_value_from_config('label_as_array')
+        self.do_softmax = self.get_value_from_config('do_softmax')
+        self.multilabel_thresh = self.get_value_from_config('multi_label_threshold')
         self.output_verified = False
 
     def select_output_blob(self, outputs):
@@ -83,7 +91,6 @@ def process(self, raw, identifiers, frame_meta):
             self.select_output_blob(raw)
         multi_infer = frame_meta[-1].get('multi_infer', False) if frame_meta else False
         raw_prediction = self._extract_predictions(raw, frame_meta)
-        self.select_output_blob(raw_prediction)
         prediction = raw_prediction[self.output_blob]
         if multi_infer:
             prediction = np.mean(prediction, axis=0)
@@ -93,28 +100,27 @@ def process(self, raw, identifiers, frame_meta):
 
         result = []
         if self.block:
-            if self.argmax_output:
-                single_prediction = ArgMaxClassificationPrediction(identifiers[0], prediction)
-            elif self.fixed_output:
-                single_prediction = ArgMaxClassificationPrediction(identifiers[0],
-                                                                   prediction[:, self.fixed_output_index])
-            else:
-                single_prediction = ClassificationPrediction(identifiers[0], prediction, self.label_as_array)
-
-            result.append(single_prediction)
+            result.append(self.prepare_representation(identifiers[0], prediction))
         else:
             for identifier, output in zip(identifiers, prediction):
-                if self.argmax_output:
-                    single_prediction = ArgMaxClassificationPrediction(identifier, [output[0], ])
-                elif self.fixed_output:
-                    single_prediction = ArgMaxClassificationPrediction(identifiers[0],
-                                                                       output[self.fixed_output_index])
-                else:
-                    single_prediction = ClassificationPrediction(identifier, output)
-                result.append(single_prediction)
+                result.append(self.prepare_representation(identifier, output))
 
         return result
 
+    def prepare_representation(self, identifier, prediction):
+        if self.argmax_output:
+            single_prediction = ArgMaxClassificationPrediction(identifier, prediction)
+        elif self.fixed_output:
+            single_prediction = ArgMaxClassificationPrediction(identifier,
+                                                               prediction[:, self.fixed_output_index])
+        else:
+            if self.do_softmax:
+                prediction = softmax(prediction)
+            single_prediction = ClassificationPrediction(
+                identifier, prediction, self.label_as_array,
+                multilabel_threshold=self.multilabel_thresh)
+        return single_prediction
+
     @staticmethod
     def _extract_predictions(outputs_list, meta):
         is_multi_infer = meta[-1].get('multi_infer', False) if meta else False
 
@@ -18,7 +18,10 @@
 import cv2
 import numpy as np
 from ..adapters import Adapter
-from ..representation import SegmentationPrediction, BrainTumorSegmentationPrediction, BackgroundMattingPrediction
+from ..representation import (
+    SegmentationPrediction, BrainTumorSegmentationPrediction, BackgroundMattingPrediction,
+    AnomalySegmentationPrediction
+)
 from ..config import ConfigError, ConfigValidator, BoolField, ListField, NumberField, StringField
 from ..utils import contains_any
 
@@ -55,22 +58,26 @@ def process(self, raw, identifiers, frame_meta):
         if segm_out.shape[0] != len(identifiers) and len(identifiers) == 1:
             segm_out = np.expand_dims(segm_out, 0)
         for identifier, output, meta in zip(identifiers, segm_out, frame_meta):
-            input_shape = next(iter(meta['input_shape'].values()))
-            is_chw = input_shape[1] <= 4
-            if len(output.shape) == 2 and len(input_shape) == 4:
-                (in_h, in_w) = input_shape[2:] if is_chw else input_shape[1:3]
-                if output.shape[0] == in_h * in_w:
-                    output = np.resize(output, (in_h, in_w, output.shape[-1]))
-                    is_chw = False
-            if self.make_argmax:
-                argmax_axis = 0 if is_chw else -1
-                output = np.argmax(output, axis=argmax_axis)
-            if not is_chw and not self.make_argmax and len(output.shape) == 3:
-                output = np.transpose(output, (2, 0, 1))
+            output = self.prepare_seg_map(meta, output)
             result.append(SegmentationPrediction(identifier, output))
 
         return result
 
+    def prepare_seg_map(self, meta, output):
+        input_shape = next(iter(meta['input_shape'].values()))
+        is_chw = input_shape[1] <= 4
+        if len(output.shape) == 2 and len(input_shape) == 4:
+            (in_h, in_w) = input_shape[2:] if is_chw else input_shape[1:3]
+            if output.shape[0] == in_h * in_w:
+                output = np.resize(output, (in_h, in_w, output.shape[-1]))
+                is_chw = False
+        if self.make_argmax:
+            argmax_axis = 0 if is_chw else -1
+            output = np.argmax(output, axis=argmax_axis)
+        if not is_chw and not self.make_argmax and len(output.shape) == 3:
+            output = np.transpose(output, (2, 0, 1))
+        return output
+
     def _extract_predictions(self, outputs_list, meta):
         if 'tiles_shape' not in (meta[-1] or {}):
             return outputs_list[0] if not isinstance(outputs_list, dict) else outputs_list
@@ -116,12 +123,28 @@ def process(self, raw, identifiers, frame_meta):
         raw_outputs = self._extract_predictions(raw, frame_meta)
         self.select_output_blob(raw_outputs)
         for identifier, output in zip(identifiers, raw_outputs[self.output_blob]):
-            output = output > self.threshold
+            output = output >= self.threshold
             result.append(SegmentationPrediction(identifier, output.astype(np.uint8)))
 
         return result
 
 
+class AnomalySegmentationAdapter(SegmentationOneClassAdapter):
+    __provider__ = 'anomaly_segmentation'
+    prediction_types = (AnomalySegmentationPrediction, )
+
+    def process(self, raw, identifiers, frame_meta):
+        result = []
+        frame_meta = frame_meta or [] * len(identifiers)
+        raw_outputs = self._extract_predictions(raw, frame_meta)
+        self.select_output_blob(raw_outputs)
+        for identifier, output in zip(identifiers, raw_outputs[self.output_blob]):
+            output = output > self.threshold
+            result.append(AnomalySegmentationPrediction(identifier, output.astype(np.uint8)))
+
+        return result
+
+
 class BrainTumorSegmentationAdapter(Adapter):
     __provider__ = 'brain_tumor_segmentation'
     prediction_types = (BrainTumorSegmentationPrediction,)
 
@@ -798,6 +798,10 @@ The main difference between this converter and `super_resolution` in data organi
 * `speaker_reidentification` - converts annotation for speaker/sound reidentification dataset to `ReidentificationClassificationAnnotation`.
   * `annotation_file` - text file containing reidentification pairs in following format: `[is_positive] [audio_1] [audio_2]`, where `is_positive` - indicator that pair is positive (1 if speaker is the same, 0 if speakers are different), `audio_1` and `audio_2` are path to the first and the second audio files respectively.
   * `max_pairs` - limit for selection pairs for each image (Optional, by default use all founded pairs in annotation).
+* `mvtec` - converts MVTec dataset to `AnomalySegmentationAnnotation` or `ClassificationAnnotation`.
+  * `data_dir` - directory with subset images.
+  * `classification_only` - converts dataset to `ClassificationAnnotation` withot saving pixel level information (Optional, default `False`).
+
 ## <a name="customizing-dataset-meta"></a>Customizing Dataset Meta
 There are situations when we need to customize some default dataset parameters (e.g. replace original dataset label map with own.)
 You are able to overload parameters such as `label_map`, `segmentation_colors`, `background_label` using `dataset_meta_file` argument.
 
@@ -125,6 +125,7 @@
 from .common_text_recognition import CommonTextRecognition
 from .coco_facial_landmarks import COCOFacialLandmarksRecognitionConverter
 from .speaker_identification import SpeakerReIdentificationDatasetConverter
+from .mvtec import MVTecDatasetConverter
 
 __all__ = [
     'BaseFormatConverter',
@@ -250,4 +251,5 @@
     'CommonTextRecognition',
     'COCOFacialLandmarksRecognitionConverter',
     'SpeakerReIdentificationDatasetConverter',
+    'MVTecDatasetConverter',
 ]
@@ -0,0 +1,80 @@
+"""
+Copyright (c) 2018-2021 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import cv2
+import numpy as np
+from .format_converter import BaseFormatConverter, ConverterReturn
+from ..representation import ClassificationAnnotation, AnomalySegmentationAnnotation
+from ..config import PathField, BoolField
+from ..utils import get_path, check_file_existence
+
+
+class MVTecDatasetConverter(BaseFormatConverter):
+    __provider__ = 'mvtec'
+
+    @classmethod
+    def parameters(cls):
+        params = super().parameters()
+        params.update({
+            'data_dir': PathField(is_directory=True, optional=False, description='Dataset root dir'),
+            'classification_only': BoolField(optional=True, default=False),
+
+        })
+        return params
+
+    def configure(self):
+        self.data_dir = self.get_value_from_config('data_dir')
+        self.test_images_dir = get_path(self.data_dir / 'test', is_directory=True)
+        self.classification_only = self.get_value_from_config('classification_only')
+        if not self.classification_only:
+            self.reference_mask_dir = self.data_dir / 'ground_truth'
+
+    def convert(self, check_content=False, progress_callback=None, progress_interval=100, **kwargs):
+        test_images = list(self.test_images_dir.rglob("**/*.png"))
+        num_iterations = len(test_images)
+        annotations = []
+        errors = [] if check_content else None
+        for idx, image in enumerate(test_images):
+            label = image.parent.name
+            label_id = 0 if label == 'good' else 1
+            identifier = str(image.relative_to(self.test_images_dir))
+            if self.classification_only:
+                annotation = ClassificationAnnotation(identifier, label_id)
+                annotations.append(annotation)
+            else:
+                mask = None
+                mask_path = None
+                if not label_id:
+                    img = cv2.imread(str(image))
+                    h, w = img.shape[:2]
+                    mask = np.zeros((h, w), dtype=np.uint8)
+                else:
+                    mask_path = str(
+                        image.with_name(image.stem + '_mask' + image.suffix).relative_to(self.test_images_dir))
+                annotation = AnomalySegmentationAnnotation(identifier, mask_path, label_id)
+                if mask_path is None:
+                    annotation.mask = mask
+                if check_content and mask_path:
+                    if not check_file_existence(self.reference_mask_dir / mask_path):
+                        errors.append('{}: does not exist'.format(self.reference_mask_dir / mask_path))
+                annotations.append(annotation)
+            if progress_callback and idx % progress_interval == 0:
+                progress_callback(idx * 100 / num_iterations)
+        return ConverterReturn(annotations, self.get_meta(), errors)
+
+    @staticmethod
+    def get_meta():
+        return {'label_map': {0: 'good', 1: 'defect'}}
@@ -42,11 +42,13 @@ Supported representations: `ClassificationAnnotation`, `TextClassificationAnnota
 * `character_recognition_accuracy` - accuracy metric for character recognition task. Metric is calculated as a percentage. Direction of metric's growth is higher-better. Supported representations: `CharacterRecognitionAnnotation`, `CharacterRecognitionPrediction`.
   * `remove_spaces` - allow removing spaces from reference and predicted strings (Optional, default - `False`).
 * `label_level_recognition_accuracy` - [label level recognition accuracy](https://dl.acm.org/doi/abs/10.1145/1143844.1143891) metric for text line character recognition task using [editdistance](https://pypi.org/project/editdistance/). Metric is calculated as a percentage. Direction of metric's growth is higher-better. Supported representations: `CharacterRecognitionAnnotation`, `CharacterRecognitionPrediction`.
-* `classification_f1-score` - [F1 score](https://en.wikipedia.org/wiki/F1_score) metric for classification task. Metric is calculated as a percentage. Direction of metric's growth is higher-better. Supported representations: `ClassificationAnnotation`, `TextClassificationAnnotation`, `ClassificationPrediction`.
+* `classification_f1-score` - [F1 score](https://en.wikipedia.org/wiki/F1_score) metric for classification task. Metric is calculated as a percentage. Direction of metric's growth is higher-better. Supported representations: `ClassificationAnnotation`, `TextClassificationAnnotation`, `ClassificationPrediction`, `ArgMaxClassificationPrediction`, `AnomalySegmentationAnnotation`, `AnomalySegmentationPrediction`.
   * `label_map` - the field in annotation metadata, which contains dataset label map (Optional, should be provided if different from default).
   * `pos_label` - class to report during metric calculation (Optional). If argument is not specified, metric for each class and mean will be reported.
+  * `pixel_level`- evaluate metric on pixel level for anomaly segmentation (Optional, default False) .
 * `metthews_correlation_coef` - [Matthews correlation coefficient (MCC)](https://en.wikipedia.org/wiki/Matthews_correlation_coefficient) for binary classification. Metric is calculated as a percentage. Direction of metric's growth is higher-better. Supported representations: `ClassificationAnnotation`, `TextClassificationAnnotation`, `ClassificationPrediction`.
-* `roc_auc_score` - [ROC AUC score](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) for binary classification. Metric is calculated as a percentage. Direction of metric's growth is higher-better. Supported representations: `ClassificationAnnotation`, `TextClassificationAnnotation`, `ClassificationPrediction` `ArgMaxClassificationPrediction`.
+* `roc_auc_score` - [ROC AUC score](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) for binary classification. Metric is calculated as a percentage. Direction of metric's growth is higher-better. Supported representations: `ClassificationAnnotation`, `TextClassificationAnnotation`, `ClassificationPrediction` `ArgMaxClassificationPrediction`, `AnomalySegmentationAnnotation`, `AnomalySegmentationPrediction`.
+  * `pixel_level`- evaluate metric on pixel level for anomaly segmentation (Optional, default False)
 * `acer_score` - metric for the classification tasks. Can be obtained from the following formula: `ACER = (APCER + BPCER)/2 = ((fp / (tn + fp)) + (fn / (fn + tp)))/2`. For more details about metrics see the section 9.3: <https://arxiv.org/abs/2007.12342>. Metric is calculated as a percentage. Direction of metric's growth is higher-worse. Supported representations: `ClassificationAnnotation`, `TextClassificationAnnotation`, `ClassificationPrediction`.
 * `clip_accuracy` - classification video-level accuracy metric. Metric is calculated as a percentage. Direction of metric's growth is higher-better. Supported representations: `ClassificationAnnotation`, `ClassificationPrediction`.
 * `map` - mean average precision. Metric is calculated as a percentage. Direction of metric's growth is higher-better. Supported representations: `DetectionAnnotation`, `DetectionPrediction`.