Skip to content

Commit 63c0aee

Browse files
authored
AC: support anomaly segmentation (#2969)
* WIP * AC: support anomaly segmentation
1 parent ec04bff commit 63c0aee

File tree

16 files changed

+399
-92
lines changed

16 files changed

+399
-92
lines changed

tools/accuracy_checker/openvino/tools/accuracy_checker/adapters/README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ AccuracyChecker supports following set of adapters:
3333
* `label_as_array` - produce ClassificationPrediction's label as array
3434
* `segmentation` - converting output of semantic segmentation model to `SeegmentationPrediction` representation.
3535
* `make_argmax` - allows applying argmax operation to output values.
36-
* `segmentation_one_class` - converting output of semantic segmentation to `SeegmentationPrediction` representation. It is suitable for situation when model's output is probability of belong each pixel to foreground class.
36+
* `segmentation_one_class` - converting output of semantic segmentation model to `SeegmentationPrediction` representation. It is suitable for cases when model's output is probability of belong each pixel to foreground class.
37+
* `threshold` - minimum probability threshold for valid class belonging.
38+
* `anomaly_segmentation` - converting output of anomaly segmentation model to `AnomalySeegmentationPrediction` representation.
3739
* `threshold` - minimum probability threshold for valid class belonging.
3840
* `tiny_yolo_v1` - converting output of Tiny YOLO v1 model to `DetectionPrediction` representation.
3941
* `reid` - converting output of reidentification model to `ReIdentificationPrediction` representation.

tools/accuracy_checker/openvino/tools/accuracy_checker/adapters/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,11 @@
7575
)
7676
from .classification import ClassificationAdapter, MaskToBinaryClassification
7777
from .segmentation import (
78-
SegmentationAdapter, BrainTumorSegmentationAdapter, DUCSegmentationAdapter, BackgroundMattingAdapter
78+
SegmentationAdapter,
79+
BrainTumorSegmentationAdapter,
80+
DUCSegmentationAdapter,
81+
BackgroundMattingAdapter,
82+
AnomalySegmentationAdapter
7983
)
8084
from .facial_landmarks_98_detection import FacialLandmarksAdapter
8185
from .pose_estimation import HumanPoseAdapter, SingleHumanPoseAdapter, StackedHourGlassNetworkAdapter
@@ -177,6 +181,7 @@
177181
'DUCSegmentationAdapter',
178182
'SalientObjectDetection',
179183
'BackgroundMattingAdapter',
184+
'AnomalySegmentationAdapter',
180185

181186
'ReidAdapter',
182187

tools/accuracy_checker/openvino/tools/accuracy_checker/adapters/classification.py

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from ..adapters import Adapter
2020
from ..config import BoolField, StringField, NumberField
2121
from ..representation import ClassificationPrediction, ArgMaxClassificationPrediction
22+
from ..utils import softmax
2223

2324

2425
class ClassificationAdapter(Adapter):
@@ -47,7 +48,12 @@ def parameters(cls):
4748
'label_as_array': BoolField(
4849
optional=True, default=False, description="produce ClassificationPrediction's label as array"
4950
),
50-
'classification_output': StringField(optional=True, description='target output layer name')
51+
'classification_output': StringField(optional=True, description='target output layer name'),
52+
'multi_label_threshold': NumberField(
53+
optional=True, value_type=float,
54+
description='threshold for treating classification as multi label problem'),
55+
'do_softmax': BoolField(
56+
optional=True, description='apply softmax on probabilities in logits format', default=False)
5157
})
5258

5359
return parameters
@@ -59,6 +65,8 @@ def configure(self):
5965
self.fixed_output = self.get_value_from_config('fixed_output')
6066
self.fixed_output_index = int(self.get_value_from_config('fixed_output_index'))
6167
self.label_as_array = self.get_value_from_config('label_as_array')
68+
self.do_softmax = self.get_value_from_config('do_softmax')
69+
self.multilabel_thresh = self.get_value_from_config('multi_label_threshold')
6270
self.output_verified = False
6371

6472
def select_output_blob(self, outputs):
@@ -83,7 +91,6 @@ def process(self, raw, identifiers, frame_meta):
8391
self.select_output_blob(raw)
8492
multi_infer = frame_meta[-1].get('multi_infer', False) if frame_meta else False
8593
raw_prediction = self._extract_predictions(raw, frame_meta)
86-
self.select_output_blob(raw_prediction)
8794
prediction = raw_prediction[self.output_blob]
8895
if multi_infer:
8996
prediction = np.mean(prediction, axis=0)
@@ -93,28 +100,27 @@ def process(self, raw, identifiers, frame_meta):
93100

94101
result = []
95102
if self.block:
96-
if self.argmax_output:
97-
single_prediction = ArgMaxClassificationPrediction(identifiers[0], prediction)
98-
elif self.fixed_output:
99-
single_prediction = ArgMaxClassificationPrediction(identifiers[0],
100-
prediction[:, self.fixed_output_index])
101-
else:
102-
single_prediction = ClassificationPrediction(identifiers[0], prediction, self.label_as_array)
103-
104-
result.append(single_prediction)
103+
result.append(self.prepare_representation(identifiers[0], prediction))
105104
else:
106105
for identifier, output in zip(identifiers, prediction):
107-
if self.argmax_output:
108-
single_prediction = ArgMaxClassificationPrediction(identifier, [output[0], ])
109-
elif self.fixed_output:
110-
single_prediction = ArgMaxClassificationPrediction(identifiers[0],
111-
output[self.fixed_output_index])
112-
else:
113-
single_prediction = ClassificationPrediction(identifier, output)
114-
result.append(single_prediction)
106+
result.append(self.prepare_representation(identifier, output))
115107

116108
return result
117109

110+
def prepare_representation(self, identifier, prediction):
111+
if self.argmax_output:
112+
single_prediction = ArgMaxClassificationPrediction(identifier, prediction)
113+
elif self.fixed_output:
114+
single_prediction = ArgMaxClassificationPrediction(identifier,
115+
prediction[:, self.fixed_output_index])
116+
else:
117+
if self.do_softmax:
118+
prediction = softmax(prediction)
119+
single_prediction = ClassificationPrediction(
120+
identifier, prediction, self.label_as_array,
121+
multilabel_threshold=self.multilabel_thresh)
122+
return single_prediction
123+
118124
@staticmethod
119125
def _extract_predictions(outputs_list, meta):
120126
is_multi_infer = meta[-1].get('multi_infer', False) if meta else False

tools/accuracy_checker/openvino/tools/accuracy_checker/adapters/segmentation.py

Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@
1818
import cv2
1919
import numpy as np
2020
from ..adapters import Adapter
21-
from ..representation import SegmentationPrediction, BrainTumorSegmentationPrediction, BackgroundMattingPrediction
21+
from ..representation import (
22+
SegmentationPrediction, BrainTumorSegmentationPrediction, BackgroundMattingPrediction,
23+
AnomalySegmentationPrediction
24+
)
2225
from ..config import ConfigError, ConfigValidator, BoolField, ListField, NumberField, StringField
2326
from ..utils import contains_any
2427

@@ -55,22 +58,26 @@ def process(self, raw, identifiers, frame_meta):
5558
if segm_out.shape[0] != len(identifiers) and len(identifiers) == 1:
5659
segm_out = np.expand_dims(segm_out, 0)
5760
for identifier, output, meta in zip(identifiers, segm_out, frame_meta):
58-
input_shape = next(iter(meta['input_shape'].values()))
59-
is_chw = input_shape[1] <= 4
60-
if len(output.shape) == 2 and len(input_shape) == 4:
61-
(in_h, in_w) = input_shape[2:] if is_chw else input_shape[1:3]
62-
if output.shape[0] == in_h * in_w:
63-
output = np.resize(output, (in_h, in_w, output.shape[-1]))
64-
is_chw = False
65-
if self.make_argmax:
66-
argmax_axis = 0 if is_chw else -1
67-
output = np.argmax(output, axis=argmax_axis)
68-
if not is_chw and not self.make_argmax and len(output.shape) == 3:
69-
output = np.transpose(output, (2, 0, 1))
61+
output = self.prepare_seg_map(meta, output)
7062
result.append(SegmentationPrediction(identifier, output))
7163

7264
return result
7365

66+
def prepare_seg_map(self, meta, output):
67+
input_shape = next(iter(meta['input_shape'].values()))
68+
is_chw = input_shape[1] <= 4
69+
if len(output.shape) == 2 and len(input_shape) == 4:
70+
(in_h, in_w) = input_shape[2:] if is_chw else input_shape[1:3]
71+
if output.shape[0] == in_h * in_w:
72+
output = np.resize(output, (in_h, in_w, output.shape[-1]))
73+
is_chw = False
74+
if self.make_argmax:
75+
argmax_axis = 0 if is_chw else -1
76+
output = np.argmax(output, axis=argmax_axis)
77+
if not is_chw and not self.make_argmax and len(output.shape) == 3:
78+
output = np.transpose(output, (2, 0, 1))
79+
return output
80+
7481
def _extract_predictions(self, outputs_list, meta):
7582
if 'tiles_shape' not in (meta[-1] or {}):
7683
return outputs_list[0] if not isinstance(outputs_list, dict) else outputs_list
@@ -116,12 +123,28 @@ def process(self, raw, identifiers, frame_meta):
116123
raw_outputs = self._extract_predictions(raw, frame_meta)
117124
self.select_output_blob(raw_outputs)
118125
for identifier, output in zip(identifiers, raw_outputs[self.output_blob]):
119-
output = output > self.threshold
126+
output = output >= self.threshold
120127
result.append(SegmentationPrediction(identifier, output.astype(np.uint8)))
121128

122129
return result
123130

124131

132+
class AnomalySegmentationAdapter(SegmentationOneClassAdapter):
133+
__provider__ = 'anomaly_segmentation'
134+
prediction_types = (AnomalySegmentationPrediction, )
135+
136+
def process(self, raw, identifiers, frame_meta):
137+
result = []
138+
frame_meta = frame_meta or [] * len(identifiers)
139+
raw_outputs = self._extract_predictions(raw, frame_meta)
140+
self.select_output_blob(raw_outputs)
141+
for identifier, output in zip(identifiers, raw_outputs[self.output_blob]):
142+
output = output > self.threshold
143+
result.append(AnomalySegmentationPrediction(identifier, output.astype(np.uint8)))
144+
145+
return result
146+
147+
125148
class BrainTumorSegmentationAdapter(Adapter):
126149
__provider__ = 'brain_tumor_segmentation'
127150
prediction_types = (BrainTumorSegmentationPrediction,)

tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,10 @@ The main difference between this converter and `super_resolution` in data organi
798798
* `speaker_reidentification` - converts annotation for speaker/sound reidentification dataset to `ReidentificationClassificationAnnotation`.
799799
* `annotation_file` - text file containing reidentification pairs in following format: `[is_positive] [audio_1] [audio_2]`, where `is_positive` - indicator that pair is positive (1 if speaker is the same, 0 if speakers are different), `audio_1` and `audio_2` are path to the first and the second audio files respectively.
800800
* `max_pairs` - limit for selection pairs for each image (Optional, by default use all founded pairs in annotation).
801+
* `mvtec` - converts MVTec dataset to `AnomalySegmentationAnnotation` or `ClassificationAnnotation`.
802+
* `data_dir` - directory with subset images.
803+
* `classification_only` - converts dataset to `ClassificationAnnotation` withot saving pixel level information (Optional, default `False`).
804+
801805
## <a name="customizing-dataset-meta"></a>Customizing Dataset Meta
802806
There are situations when we need to customize some default dataset parameters (e.g. replace original dataset label map with own.)
803807
You are able to overload parameters such as `label_map`, `segmentation_colors`, `background_label` using `dataset_meta_file` argument.

tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@
125125
from .common_text_recognition import CommonTextRecognition
126126
from .coco_facial_landmarks import COCOFacialLandmarksRecognitionConverter
127127
from .speaker_identification import SpeakerReIdentificationDatasetConverter
128+
from .mvtec import MVTecDatasetConverter
128129

129130
__all__ = [
130131
'BaseFormatConverter',
@@ -250,4 +251,5 @@
250251
'CommonTextRecognition',
251252
'COCOFacialLandmarksRecognitionConverter',
252253
'SpeakerReIdentificationDatasetConverter',
254+
'MVTecDatasetConverter',
253255
]
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
"""
2+
Copyright (c) 2018-2021 Intel Corporation
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
"""
16+
17+
import cv2
18+
import numpy as np
19+
from .format_converter import BaseFormatConverter, ConverterReturn
20+
from ..representation import ClassificationAnnotation, AnomalySegmentationAnnotation
21+
from ..config import PathField, BoolField
22+
from ..utils import get_path, check_file_existence
23+
24+
25+
class MVTecDatasetConverter(BaseFormatConverter):
26+
__provider__ = 'mvtec'
27+
28+
@classmethod
29+
def parameters(cls):
30+
params = super().parameters()
31+
params.update({
32+
'data_dir': PathField(is_directory=True, optional=False, description='Dataset root dir'),
33+
'classification_only': BoolField(optional=True, default=False),
34+
35+
})
36+
return params
37+
38+
def configure(self):
39+
self.data_dir = self.get_value_from_config('data_dir')
40+
self.test_images_dir = get_path(self.data_dir / 'test', is_directory=True)
41+
self.classification_only = self.get_value_from_config('classification_only')
42+
if not self.classification_only:
43+
self.reference_mask_dir = self.data_dir / 'ground_truth'
44+
45+
def convert(self, check_content=False, progress_callback=None, progress_interval=100, **kwargs):
46+
test_images = list(self.test_images_dir.rglob("**/*.png"))
47+
num_iterations = len(test_images)
48+
annotations = []
49+
errors = [] if check_content else None
50+
for idx, image in enumerate(test_images):
51+
label = image.parent.name
52+
label_id = 0 if label == 'good' else 1
53+
identifier = str(image.relative_to(self.test_images_dir))
54+
if self.classification_only:
55+
annotation = ClassificationAnnotation(identifier, label_id)
56+
annotations.append(annotation)
57+
else:
58+
mask = None
59+
mask_path = None
60+
if not label_id:
61+
img = cv2.imread(str(image))
62+
h, w = img.shape[:2]
63+
mask = np.zeros((h, w), dtype=np.uint8)
64+
else:
65+
mask_path = str(
66+
image.with_name(image.stem + '_mask' + image.suffix).relative_to(self.test_images_dir))
67+
annotation = AnomalySegmentationAnnotation(identifier, mask_path, label_id)
68+
if mask_path is None:
69+
annotation.mask = mask
70+
if check_content and mask_path:
71+
if not check_file_existence(self.reference_mask_dir / mask_path):
72+
errors.append('{}: does not exist'.format(self.reference_mask_dir / mask_path))
73+
annotations.append(annotation)
74+
if progress_callback and idx % progress_interval == 0:
75+
progress_callback(idx * 100 / num_iterations)
76+
return ConverterReturn(annotations, self.get_meta(), errors)
77+
78+
@staticmethod
79+
def get_meta():
80+
return {'label_map': {0: 'good', 1: 'defect'}}

tools/accuracy_checker/openvino/tools/accuracy_checker/metrics/README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,13 @@ Supported representations: `ClassificationAnnotation`, `TextClassificationAnnota
4242
* `character_recognition_accuracy` - accuracy metric for character recognition task. Metric is calculated as a percentage. Direction of metric's growth is higher-better. Supported representations: `CharacterRecognitionAnnotation`, `CharacterRecognitionPrediction`.
4343
* `remove_spaces` - allow removing spaces from reference and predicted strings (Optional, default - `False`).
4444
* `label_level_recognition_accuracy` - [label level recognition accuracy](https://dl.acm.org/doi/abs/10.1145/1143844.1143891) metric for text line character recognition task using [editdistance](https://pypi.org/project/editdistance/). Metric is calculated as a percentage. Direction of metric's growth is higher-better. Supported representations: `CharacterRecognitionAnnotation`, `CharacterRecognitionPrediction`.
45-
* `classification_f1-score` - [F1 score](https://en.wikipedia.org/wiki/F1_score) metric for classification task. Metric is calculated as a percentage. Direction of metric's growth is higher-better. Supported representations: `ClassificationAnnotation`, `TextClassificationAnnotation`, `ClassificationPrediction`.
45+
* `classification_f1-score` - [F1 score](https://en.wikipedia.org/wiki/F1_score) metric for classification task. Metric is calculated as a percentage. Direction of metric's growth is higher-better. Supported representations: `ClassificationAnnotation`, `TextClassificationAnnotation`, `ClassificationPrediction`, `ArgMaxClassificationPrediction`, `AnomalySegmentationAnnotation`, `AnomalySegmentationPrediction`.
4646
* `label_map` - the field in annotation metadata, which contains dataset label map (Optional, should be provided if different from default).
4747
* `pos_label` - class to report during metric calculation (Optional). If argument is not specified, metric for each class and mean will be reported.
48+
* `pixel_level`- evaluate metric on pixel level for anomaly segmentation (Optional, default False) .
4849
* `metthews_correlation_coef` - [Matthews correlation coefficient (MCC)](https://en.wikipedia.org/wiki/Matthews_correlation_coefficient) for binary classification. Metric is calculated as a percentage. Direction of metric's growth is higher-better. Supported representations: `ClassificationAnnotation`, `TextClassificationAnnotation`, `ClassificationPrediction`.
49-
* `roc_auc_score` - [ROC AUC score](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) for binary classification. Metric is calculated as a percentage. Direction of metric's growth is higher-better. Supported representations: `ClassificationAnnotation`, `TextClassificationAnnotation`, `ClassificationPrediction` `ArgMaxClassificationPrediction`.
50+
* `roc_auc_score` - [ROC AUC score](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) for binary classification. Metric is calculated as a percentage. Direction of metric's growth is higher-better. Supported representations: `ClassificationAnnotation`, `TextClassificationAnnotation`, `ClassificationPrediction` `ArgMaxClassificationPrediction`, `AnomalySegmentationAnnotation`, `AnomalySegmentationPrediction`.
51+
* `pixel_level`- evaluate metric on pixel level for anomaly segmentation (Optional, default False)
5052
* `acer_score` - metric for the classification tasks. Can be obtained from the following formula: `ACER = (APCER + BPCER)/2 = ((fp / (tn + fp)) + (fn / (fn + tp)))/2`. For more details about metrics see the section 9.3: <https://arxiv.org/abs/2007.12342>. Metric is calculated as a percentage. Direction of metric's growth is higher-worse. Supported representations: `ClassificationAnnotation`, `TextClassificationAnnotation`, `ClassificationPrediction`.
5153
* `clip_accuracy` - classification video-level accuracy metric. Metric is calculated as a percentage. Direction of metric's growth is higher-better. Supported representations: `ClassificationAnnotation`, `ClassificationPrediction`.
5254
* `map` - mean average precision. Metric is calculated as a percentage. Direction of metric's growth is higher-better. Supported representations: `DetectionAnnotation`, `DetectionPrediction`.

0 commit comments

Comments
 (0)