Add support for IFRNet (#3868)

pwolnows · web-flow · commit 626c8f5ba3bc · 2023-10-17T14:18:50.000+04:00
* Add support for IFRNet

* Update vimeo90k.py

Use ImageProcessingAnnotation instead of SuperResolutionAnnotation

* Avoid repeated reading of annotated image

* Update vimeo90k.py

* Remove spaces image_processing.py

* Update documentation and PR comments

* Correct class name vimeo90k.py

* Fix pylint errors

* Fix line too long
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/README.md b/tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/README.md
@@ -750,6 +750,8 @@ The main difference between this converter and `super_resolution` in data organi
 * `vimeo90k` - converts Vimeo-90K dataset for a systematic evaluation of video processing algorithms to `SuperResolutionAnnotation`.
   * `annotation_file` - path to text file with list of dataset setuplets included in test.
   * `add_flow` - allows annotation of flow data (optional, default `False`).
+* `vimeo90k_interp` - converts Vimeo-90K intermediate frame interpolation dataset for a systematic evaluation of video processing algorithms to `ImageProcessingAnnotation`.
+  * `annotation_file` - path to text file with list of dataset setuplets included in test.
 * `kaldi_asr_data` - converts preprocessed Kaldi\* features dataset to `CharacterRecognitionAnnotation`.
    * `annotation_file` - file with gt transcription table.
    * `data_dir` - directory with ark files.
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/__init__.py b/tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/__init__.py
@@ -110,7 +110,7 @@
 from .background_matting import BackgroundMattingConverter, VideoBackgroundMatting
 from .tacotron2_test_data_converter import TacotronDataConverter
 from .noise_suppression_dataset import NoiseSuppressionDatasetConverter
-from .vimeo90k_sr import Vimeo90KSuperResolutionDatasetConverter
+from .vimeo90k import Vimeo90KSuperResolutionDatasetConverter, Vimeo90KIntermediateFrameDatasetConverter
 from .lmdb import LMDBConverter
 from .electricity_time_series_forecasting import ElectricityTimeSeriesForecastingConverter
 from .kaldi_speech_recognition_pipeline import KaldiSpeechRecognitionDataConverter, KaldiFeatureRegressionConverter
@@ -239,6 +239,7 @@
     'TacotronDataConverter',
     'NoiseSuppressionDatasetConverter',
     'Vimeo90KSuperResolutionDatasetConverter',
+    'Vimeo90KIntermediateFrameDatasetConverter',
     'LMDBConverter',
     'ElectricityTimeSeriesForecastingConverter',
     'KaldiSpeechRecognitionDataConverter',
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/vimeo90k.py b/tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/vimeo90k.py
@@ -17,7 +17,7 @@
 from .format_converter import BaseFormatConverter, ConverterReturn
 from ..data_readers import MultiFramesInputIdentifier
 from ..config import PathField, BoolField
-from ..representation import SuperResolutionAnnotation
+from ..representation import SuperResolutionAnnotation, ImageProcessingAnnotation
 from ..utils import read_txt
 
 
@@ -50,3 +50,30 @@ def convert(self, check_content=False, progress_callback=None, progress_interval
             annotations.append(SuperResolutionAnnotation(
                 MultiFramesInputIdentifier(list(range(len(input_data))), input_data), target))
         return ConverterReturn(annotations, None, None)
+
+class Vimeo90KIntermediateFrameDatasetConverter(BaseFormatConverter):
+    __provider__ = 'vimeo90k_interp'
+
+    @classmethod
+    def parameters(cls):
+        params = super().parameters()
+        params.update({
+            'annotation_file': PathField(description='testing split file'),
+        })
+        return params
+
+    def configure(self):
+        self.annotation_file = self.get_value_from_config('annotation_file')
+
+    def convert(self, check_content=False, progress_callback=None, progress_interval=100, **kwargs):
+        test_set = read_txt(self.annotation_file)
+        annotations = []
+        for sept in test_set:
+            target = 'target/{}/im2.png'.format(sept)
+            input0 = 'input/{}/im1.png'.format(sept)
+            input1 = 'input/{}/im3.png'.format(sept)
+            input_data = [ input0, input1 ]
+            annotations.append(ImageProcessingAnnotation(
+                MultiFramesInputIdentifier(list(range(len(input_data))), input_data), target)
+            )
+        return ConverterReturn(annotations, None, None)
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/metrics/README.md b/tools/accuracy_checker/openvino/tools/accuracy_checker/metrics/README.md
@@ -152,6 +152,8 @@ More detailed information about calculation segmentation metrics you can find [h
 * `psnr` - [Peak signal to noise ratio](https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio). Metric is calculated as a decibel(dB). Direction of metric's mean growth is higher-better. Direction of metric's std and max_error growth is higher-worse. Supported representations: `SuperResolutionAnnotation`, `SuperResolutionPrediction`, `ImageProcessingAnnotation`, `ImageProcessingPrediction`, `ImageInpaintingAnnotation`, `ImageInpaintingPrediction`.
   * `color_order` - the field specified which color order `BGR` or `RGB` will be used during metric calculation (Optional. Default value is RGB), used only if you have 3-channel images.
   * `normalized_images` - whether the images are normalized in [0, 1] range or not. Optional, default `False`.
+  * `scale_border` - Scale boarder - the number of pixels to crop from the height and width of the image. Optional, default value 4. 
+  * `unweighted_average` - whether metric is calculated as for grayscale image or not (3-channel images by default use weighted average of R, G, B channels). Optional, defaul value 'False'.
 * `psnr-b` - [Peak signal to noise ratio with blocked effect factor](https://link.springer.com/chapter/10.1007/978-3-642-34595-1_16). Metric is calculated as a decibel(dB). Direction of metric's mean growth is higher-better. Direction of metric's std and max_error growth is higher-worse. Supported representations: `SuperResolutionAnnotation`, `SuperResolutionPrediction`, `ImageProcessingAnnotation`, `ImageProcessingPrediction`, `ImageInpaintingAnnotation`, `ImageInpaintingPrediction`.
   * `color_order` - the field specified which color order `BGR` or `RGB` will be used during metric calculation (Optional. Default value is RGB), used only if you have 3-channel images.
   * `normalized_images` - whether the images are normalized in [0, 1] range or not. Optional, default `False`.
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/metrics/image_quality_assessment.py b/tools/accuracy_checker/openvino/tools/accuracy_checker/metrics/image_quality_assessment.py
@@ -31,6 +31,7 @@
 except ImportError as import_err:
     convolve2d = UnsupportedPackage('scipy', import_err)
 
+
 def _ssim(annotation_image, prediction_image):
     prediction = np.asarray(prediction_image)
     ground_truth = np.asarray(annotation_image)
@@ -78,13 +79,21 @@ def parameters(cls):
         parameters = super().parameters()
         parameters.update({
             'scale_border': NumberField(
-                optional=True, min_value=0, default=4, description="Scale border.", value_type=int
+                optional=True, min_value=0, default=4,
+                description="Scale border - the number of pixels to crop from the height and width of the image.",
+                value_type=int
             ),
             'color_order': StringField(
                 optional=True, choices=['BGR', 'RGB'], default='RGB',
                 description="The field specified which color order BGR or RGB will be used during metric calculation."
             ),
-            'normalized_images': BoolField(optional=True, default=False, description='images in [0, 1] range or not')
+            'normalized_images': BoolField(
+                optional=True, default=False, description='images in [0, 1] range or not'),
+            'unweighted_average': BoolField(
+                optional=True, default=False, description="calculate metric as for grayscale image or not"
+                                                          " (3-channel images by default use weighted average"
+                                                          " of R, G, B channels)."
+            )
         })
 
         return parameters
@@ -96,13 +105,15 @@ def configure(self):
         super().configure()
         self.scale_border = self.get_value_from_config('scale_border')
         self.color_order = self.get_value_from_config('color_order')
+        self.unweighted_average = self.get_value_from_config('unweighted_average')
         channel_order = {
             'BGR': [2, 1, 0],
             'RGB': [0, 1, 2],
         }
         self.channel_order = channel_order[self.color_order]
         self.normalized_images = self.get_value_from_config('normalized_images')
         self.color_scale = 255 if not self.normalized_images else 1
+        self.color_scale = 255 if not self.normalized_images else 1
 
     def _psnr_differ(self, annotation_image, prediction_image):
         prediction = np.squeeze(np.asarray(prediction_image)).astype(float)
@@ -123,19 +134,19 @@ def _psnr_differ(self, annotation_image, prediction_image):
                 cv2.COLOR_BGR2GRAY if self.color_order == 'BGR' else cv2.COLOR_RGB2GRAY
             ).astype(float)
         image_difference = (prediction - ground_truth) / self.color_scale
-        if len(ground_truth.shape) == 3 and ground_truth.shape[2] == 3:
+        if len(ground_truth.shape) == 3 and ground_truth.shape[2] == 3 and not self.unweighted_average:
             r_channel_diff = image_difference[:, :, self.channel_order[0]]
             g_channel_diff = image_difference[:, :, self.channel_order[1]]
             b_channel_diff = image_difference[:, :, self.channel_order[2]]
 
             channels_diff = (r_channel_diff * 65.738 + g_channel_diff * 129.057 + b_channel_diff * 25.064) / 256
 
             mse = np.mean(channels_diff ** 2)
-            if mse == 0:
-                return np.Infinity
         else:
             mse = np.mean(image_difference ** 2)
 
+        if mse == 0:
+            return np.Infinity
         return -10 * math.log10(mse)
 
     @classmethod
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/preprocessor/geometric_transformations.py b/tools/accuracy_checker/openvino/tools/accuracy_checker/preprocessor/geometric_transformations.py
@@ -770,7 +770,13 @@ def configure(self):
         self.axes = self.get_value_from_config('axes')
 
     def process(self, image, annotation_meta=None):
-        image.data = np.transpose(image.data, self.axes)
+        def process_data(data):
+            return np.transpose(data, self.axes)
+
+        image.data = process_data(image.data) if not isinstance(image.data, list) else [
+            process_data(data_fragment) for data_fragment in image.data
+        ]
+
         return image
 
     @property
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/representation/image_processing.py b/tools/accuracy_checker/openvino/tools/accuracy_checker/representation/image_processing.py
@@ -70,7 +70,8 @@ def value(self):
             if self._gt_loader == self.LOADERS[GTLoader.PILLOW]:
                 loader.convert_to_rgb = self._pillow_to_rgb if hasattr(self, '_pillow_to_rgb') else False
             gt = loader.read(self._image_path)
-            return gt.astype(np.uint8) if self._gt_loader not in ['dicom_reader', 'rawpy', 'numpy_reader'] else gt
+            self._value = gt.astype(np.uint8) if self._gt_loader not in ['dicom_reader', 'rawpy',
+                                                                         'numpy_reader'] else gt
         return self._value
 
     @value.setter