Skip to content

Commit 626c8f5

Browse files
authored
Add support for IFRNet (#3868)
* Add support for IFRNet * Update vimeo90k.py Use ImageProcessingAnnotation instead of SuperResolutionAnnotation * Avoid repeated reading of annotated image * Update vimeo90k.py * Remove spaces image_processing.py * Update documentation and PR comments * Correct class name vimeo90k.py * Fix pylint errors * Fix line too long
1 parent d7fd1e1 commit 626c8f5

File tree

7 files changed

+59
-9
lines changed

7 files changed

+59
-9
lines changed

tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,8 @@ The main difference between this converter and `super_resolution` in data organi
750750
* `vimeo90k` - converts Vimeo-90K dataset for a systematic evaluation of video processing algorithms to `SuperResolutionAnnotation`.
751751
* `annotation_file` - path to text file with list of dataset setuplets included in test.
752752
* `add_flow` - allows annotation of flow data (optional, default `False`).
753+
* `vimeo90k_interp` - converts Vimeo-90K intermediate frame interpolation dataset for a systematic evaluation of video processing algorithms to `ImageProcessingAnnotation`.
754+
* `annotation_file` - path to text file with list of dataset setuplets included in test.
753755
* `kaldi_asr_data` - converts preprocessed Kaldi\* features dataset to `CharacterRecognitionAnnotation`.
754756
* `annotation_file` - file with gt transcription table.
755757
* `data_dir` - directory with ark files.

tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@
110110
from .background_matting import BackgroundMattingConverter, VideoBackgroundMatting
111111
from .tacotron2_test_data_converter import TacotronDataConverter
112112
from .noise_suppression_dataset import NoiseSuppressionDatasetConverter
113-
from .vimeo90k_sr import Vimeo90KSuperResolutionDatasetConverter
113+
from .vimeo90k import Vimeo90KSuperResolutionDatasetConverter, Vimeo90KIntermediateFrameDatasetConverter
114114
from .lmdb import LMDBConverter
115115
from .electricity_time_series_forecasting import ElectricityTimeSeriesForecastingConverter
116116
from .kaldi_speech_recognition_pipeline import KaldiSpeechRecognitionDataConverter, KaldiFeatureRegressionConverter
@@ -239,6 +239,7 @@
239239
'TacotronDataConverter',
240240
'NoiseSuppressionDatasetConverter',
241241
'Vimeo90KSuperResolutionDatasetConverter',
242+
'Vimeo90KIntermediateFrameDatasetConverter',
242243
'LMDBConverter',
243244
'ElectricityTimeSeriesForecastingConverter',
244245
'KaldiSpeechRecognitionDataConverter',

tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/vimeo90k_sr.py renamed to tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/vimeo90k.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from .format_converter import BaseFormatConverter, ConverterReturn
1818
from ..data_readers import MultiFramesInputIdentifier
1919
from ..config import PathField, BoolField
20-
from ..representation import SuperResolutionAnnotation
20+
from ..representation import SuperResolutionAnnotation, ImageProcessingAnnotation
2121
from ..utils import read_txt
2222

2323

@@ -50,3 +50,30 @@ def convert(self, check_content=False, progress_callback=None, progress_interval
5050
annotations.append(SuperResolutionAnnotation(
5151
MultiFramesInputIdentifier(list(range(len(input_data))), input_data), target))
5252
return ConverterReturn(annotations, None, None)
53+
54+
class Vimeo90KIntermediateFrameDatasetConverter(BaseFormatConverter):
55+
__provider__ = 'vimeo90k_interp'
56+
57+
@classmethod
58+
def parameters(cls):
59+
params = super().parameters()
60+
params.update({
61+
'annotation_file': PathField(description='testing split file'),
62+
})
63+
return params
64+
65+
def configure(self):
66+
self.annotation_file = self.get_value_from_config('annotation_file')
67+
68+
def convert(self, check_content=False, progress_callback=None, progress_interval=100, **kwargs):
69+
test_set = read_txt(self.annotation_file)
70+
annotations = []
71+
for sept in test_set:
72+
target = 'target/{}/im2.png'.format(sept)
73+
input0 = 'input/{}/im1.png'.format(sept)
74+
input1 = 'input/{}/im3.png'.format(sept)
75+
input_data = [ input0, input1 ]
76+
annotations.append(ImageProcessingAnnotation(
77+
MultiFramesInputIdentifier(list(range(len(input_data))), input_data), target)
78+
)
79+
return ConverterReturn(annotations, None, None)

tools/accuracy_checker/openvino/tools/accuracy_checker/metrics/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@ More detailed information about calculation segmentation metrics you can find [h
152152
* `psnr` - [Peak signal to noise ratio](https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio). Metric is calculated as a decibel(dB). Direction of metric's mean growth is higher-better. Direction of metric's std and max_error growth is higher-worse. Supported representations: `SuperResolutionAnnotation`, `SuperResolutionPrediction`, `ImageProcessingAnnotation`, `ImageProcessingPrediction`, `ImageInpaintingAnnotation`, `ImageInpaintingPrediction`.
153153
* `color_order` - the field specified which color order `BGR` or `RGB` will be used during metric calculation (Optional. Default value is RGB), used only if you have 3-channel images.
154154
* `normalized_images` - whether the images are normalized in [0, 1] range or not. Optional, default `False`.
155+
* `scale_border` - Scale boarder - the number of pixels to crop from the height and width of the image. Optional, default value 4.
156+
* `unweighted_average` - whether metric is calculated as for grayscale image or not (3-channel images by default use weighted average of R, G, B channels). Optional, defaul value 'False'.
155157
* `psnr-b` - [Peak signal to noise ratio with blocked effect factor](https://link.springer.com/chapter/10.1007/978-3-642-34595-1_16). Metric is calculated as a decibel(dB). Direction of metric's mean growth is higher-better. Direction of metric's std and max_error growth is higher-worse. Supported representations: `SuperResolutionAnnotation`, `SuperResolutionPrediction`, `ImageProcessingAnnotation`, `ImageProcessingPrediction`, `ImageInpaintingAnnotation`, `ImageInpaintingPrediction`.
156158
* `color_order` - the field specified which color order `BGR` or `RGB` will be used during metric calculation (Optional. Default value is RGB), used only if you have 3-channel images.
157159
* `normalized_images` - whether the images are normalized in [0, 1] range or not. Optional, default `False`.

tools/accuracy_checker/openvino/tools/accuracy_checker/metrics/image_quality_assessment.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
except ImportError as import_err:
3232
convolve2d = UnsupportedPackage('scipy', import_err)
3333

34+
3435
def _ssim(annotation_image, prediction_image):
3536
prediction = np.asarray(prediction_image)
3637
ground_truth = np.asarray(annotation_image)
@@ -78,13 +79,21 @@ def parameters(cls):
7879
parameters = super().parameters()
7980
parameters.update({
8081
'scale_border': NumberField(
81-
optional=True, min_value=0, default=4, description="Scale border.", value_type=int
82+
optional=True, min_value=0, default=4,
83+
description="Scale border - the number of pixels to crop from the height and width of the image.",
84+
value_type=int
8285
),
8386
'color_order': StringField(
8487
optional=True, choices=['BGR', 'RGB'], default='RGB',
8588
description="The field specified which color order BGR or RGB will be used during metric calculation."
8689
),
87-
'normalized_images': BoolField(optional=True, default=False, description='images in [0, 1] range or not')
90+
'normalized_images': BoolField(
91+
optional=True, default=False, description='images in [0, 1] range or not'),
92+
'unweighted_average': BoolField(
93+
optional=True, default=False, description="calculate metric as for grayscale image or not"
94+
" (3-channel images by default use weighted average"
95+
" of R, G, B channels)."
96+
)
8897
})
8998

9099
return parameters
@@ -96,13 +105,15 @@ def configure(self):
96105
super().configure()
97106
self.scale_border = self.get_value_from_config('scale_border')
98107
self.color_order = self.get_value_from_config('color_order')
108+
self.unweighted_average = self.get_value_from_config('unweighted_average')
99109
channel_order = {
100110
'BGR': [2, 1, 0],
101111
'RGB': [0, 1, 2],
102112
}
103113
self.channel_order = channel_order[self.color_order]
104114
self.normalized_images = self.get_value_from_config('normalized_images')
105115
self.color_scale = 255 if not self.normalized_images else 1
116+
self.color_scale = 255 if not self.normalized_images else 1
106117

107118
def _psnr_differ(self, annotation_image, prediction_image):
108119
prediction = np.squeeze(np.asarray(prediction_image)).astype(float)
@@ -123,19 +134,19 @@ def _psnr_differ(self, annotation_image, prediction_image):
123134
cv2.COLOR_BGR2GRAY if self.color_order == 'BGR' else cv2.COLOR_RGB2GRAY
124135
).astype(float)
125136
image_difference = (prediction - ground_truth) / self.color_scale
126-
if len(ground_truth.shape) == 3 and ground_truth.shape[2] == 3:
137+
if len(ground_truth.shape) == 3 and ground_truth.shape[2] == 3 and not self.unweighted_average:
127138
r_channel_diff = image_difference[:, :, self.channel_order[0]]
128139
g_channel_diff = image_difference[:, :, self.channel_order[1]]
129140
b_channel_diff = image_difference[:, :, self.channel_order[2]]
130141

131142
channels_diff = (r_channel_diff * 65.738 + g_channel_diff * 129.057 + b_channel_diff * 25.064) / 256
132143

133144
mse = np.mean(channels_diff ** 2)
134-
if mse == 0:
135-
return np.Infinity
136145
else:
137146
mse = np.mean(image_difference ** 2)
138147

148+
if mse == 0:
149+
return np.Infinity
139150
return -10 * math.log10(mse)
140151

141152
@classmethod

tools/accuracy_checker/openvino/tools/accuracy_checker/preprocessor/geometric_transformations.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -770,7 +770,13 @@ def configure(self):
770770
self.axes = self.get_value_from_config('axes')
771771

772772
def process(self, image, annotation_meta=None):
773-
image.data = np.transpose(image.data, self.axes)
773+
def process_data(data):
774+
return np.transpose(data, self.axes)
775+
776+
image.data = process_data(image.data) if not isinstance(image.data, list) else [
777+
process_data(data_fragment) for data_fragment in image.data
778+
]
779+
774780
return image
775781

776782
@property

tools/accuracy_checker/openvino/tools/accuracy_checker/representation/image_processing.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ def value(self):
7070
if self._gt_loader == self.LOADERS[GTLoader.PILLOW]:
7171
loader.convert_to_rgb = self._pillow_to_rgb if hasattr(self, '_pillow_to_rgb') else False
7272
gt = loader.read(self._image_path)
73-
return gt.astype(np.uint8) if self._gt_loader not in ['dicom_reader', 'rawpy', 'numpy_reader'] else gt
73+
self._value = gt.astype(np.uint8) if self._gt_loader not in ['dicom_reader', 'rawpy',
74+
'numpy_reader'] else gt
7475
return self._value
7576

7677
@value.setter

0 commit comments

Comments
 (0)