AC: Added processed_image_info input type (#3298)

Anna Grebneva · web-flow · commit 1f25cd94c07c · 2022-02-25T20:02:14.000+03:00
* Added processed_image_info input type

* Updated docs
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/config/config_validator.py b/tools/accuracy_checker/openvino/tools/accuracy_checker/config/config_validator.py
@@ -369,7 +369,8 @@ def type(self):
 
 class InputField(BaseField):
     INPUTS_TYPES = (
-        'CONST_INPUT', 'INPUT', 'IMAGE_INFO', 'ORIG_IMAGE_INFO', 'LSTM_INPUT', 'IGNORE_INPUT', 'SCALE_FACTOR'
+        'CONST_INPUT', 'INPUT', 'IMAGE_INFO', 'ORIG_IMAGE_INFO', 'PROCESSED_IMAGE_INFO', 'LSTM_INPUT', 'IGNORE_INPUT',
+        'SCALE_FACTOR'
     )
     LAYOUT_TYPES = ('NCHW', 'NHWC', 'NCWH', 'NWHC')
     PRECISIONS = ('FP32', 'FP16', 'U8', 'U16', 'I8', 'I16', 'I32', 'I64')
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/caffe_launcher_readme.md b/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/caffe_launcher_readme.md
@@ -20,6 +20,7 @@ Each input description should has following info:
     * `CONST_INPUT` - input will be filled using constant provided in config. It also requires to provide `value`.
     * `IMAGE_INFO` - specific key for setting information about input shape to layer (used in Faster RCNN based topologies). You do not need to provide `value`, because it will be calculated in runtime. Format value is list with `N` elements of the form `[H, W, S]`, where `N` is batch size, `H` - original image height, `W` - original image width, `S` - scale of original image (default 1).
     * `ORIG_IMAGE_INFO` - specific key for setting information about original image size before preprocessing.
+    * `PROCESSED_IMAGE_INFO` - specific key for setting information about input size after preprocessing.
     * `SCALE_FACTOR` - specific key for setting information about image scale factor defined as `[SCALE_Y, SCALE_X]`, where `SCALE_Y` = `<resized_image_height>/<original_image_height`, `SCALE_X` = `<resized_image_width> / <original_image_width>`
     * `INPUT` - network input for main data stream (e. g. images). If you have several data inputs, you should provide regular expression for identifier as `value` for specifying which one data should be provided in specific input.
     * `IGNORE_INPUT` - input which should be stayed empty during evaluation.
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/dlsdk_launcher_readme.md b/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/dlsdk_launcher_readme.md
@@ -56,6 +56,7 @@ Each input description should has following info:
     * `CONST_INPUT` - input will be filled using constant provided in config. It also requires to provide `value`.
     * `IMAGE_INFO` - specific key for setting information about input shape to layer (used in Faster RCNN based topologies). You do not need to provide `value`, because it will be calculated in runtime. Format value is list with `N` elements of the form `[H, W, S]`, where `N` is batch size, `H` - original image height, `W` - original image width, `S` - scale of original image (default 1).
     * `ORIG_IMAGE_INFO` - specific key for setting information about original image size before preprocessing.
+    * `PROCESSED_IMAGE_INFO` - specific key for setting information about input size after preprocessing.
     * `SCALE_FACTOR` - specific key for setting information about image scale factor defined as `[SCALE_Y, SCALE_X]`, where `SCALE_Y` = `<resized_image_height>/<original_image_height`, `SCALE_X` = `<resized_image_width> / <original_image_width>`
     * `INPUT` - network input for main data stream (e. g. images). If you have several data inputs, you should provide regular expression for identifier as `value` for specifying which one data should be provided in specific input.
     * `LSTM_INPUT` - input which should be filled by hidden state from previous iteration. The hidden state layer name should be provided via `value` parameter.
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/gapi_launcher_readme.md b/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/gapi_launcher_readme.md
@@ -21,6 +21,7 @@ Each input description should has following info:
     * `CONST_INPUT` - input will be filled using constant provided in config. It also requires to provide `value`.
     * `IMAGE_INFO` - specific key for setting information about input shape to layer (used in Faster RCNN based topologies). You do not need to provide `value`, because it will be calculated in runtime. Format value is list with `N` elements of the form `[H, W, S]`, where `N` is batch size, `H` - original image height, `W` - original image width, `S` - scale of original image (default 1).
     * `ORIG_IMAGE_INFO` - specific key for setting information about original image size before preprocessing.
+    * `PROCESSED_IMAGE_INFO` - specific key for setting information about input size after preprocessing.
     * `IGNORE_INPUT` - input which should be stayed empty during evaluation.
     * `INPUT` - network input for main data stream (e. g. images). If you have several data inputs, you should provide regular expression for identifier as `value` for specifying which one data should be provided in specific input.
   * `shape` - shape of input layer described as comma-separated of all dimensions size except batch size.
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/input_feeder.py b/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/input_feeder.py
@@ -80,7 +80,8 @@
     'boolean': np.uint8
 }
 
-INPUT_TYPES_WITHOUT_VALUE = ['IMAGE_INFO', 'ORIG_IMAGE_INFO', 'IGNORE_INPUT', 'LSTM_INPUT', 'SCALE_FACTOR']
+INPUT_TYPES_WITHOUT_VALUE = ['IMAGE_INFO', 'ORIG_IMAGE_INFO', 'PROCESSED_IMAGE_INFO', 'IGNORE_INPUT', 'LSTM_INPUT',
+                             'SCALE_FACTOR']
 
 
 class InputFeeder:
@@ -116,9 +117,10 @@ def configure(self, inputs_config, precisions_list, layouts):
         if not self.dummy:
             parsing_results = self._parse_inputs_config(inputs_config, self.default_layout, precisions_list, layouts)
             self.const_inputs, self.non_constant_inputs, self.inputs_mapping = parsing_results[:3]
-            self.image_info_inputs, self.orig_image_info_inputs, self.scale_factor_inputs = parsing_results[3:6]
-            self.lstm_inputs = parsing_results[6]
-            self.ignore_inputs, self.layouts_mapping, self.precision_mapping, self.inputs_config = parsing_results[7:]
+            (self.image_info_inputs, self.orig_image_info_inputs, self.processed_image_info_inputs,
+             self.scale_factor_inputs) = parsing_results[3:7]
+            self.lstm_inputs = parsing_results[7]
+            self.ignore_inputs, self.layouts_mapping, self.precision_mapping, self.inputs_config = parsing_results[8:]
             if not self.non_constant_inputs:
                 raise ConfigError('Network should contain at least one layer for setting variable data.')
 
@@ -147,8 +149,16 @@ def prepare_scale_factor(image_meta):
                 return [[meta['scale_y'], meta['scale_x']] for meta in image_meta]
             return [[1, 1] for _ in image_meta]
 
-        meta_batch = extract_image_representations(data_representation_batch, meta_only=True)
+        data_batch, meta_batch = extract_image_representations(data_representation_batch, meta_only=False)
         image_infos = {}
+        if self.processed_image_info_inputs:
+            image_info_data = [np.shape(data) for data in data_batch]
+            image_infos = {
+                processed_image_info_input:
+                    prepare_image_info(image_info_data, processed_image_info_input, False)
+                for processed_image_info_input in self.processed_image_info_inputs
+            }
+            return image_infos
         im_info_resolved = False
         if 'image_info' in meta_batch[0]:
             image_info_data = [meta['image_info'] for meta in meta_batch]
@@ -187,7 +197,10 @@ def match_by_regex(data, identifiers, input_regex):
 
         filled_inputs = {}
         check_regex = True
-        if self.image_info_inputs or self.orig_image_info_inputs or self.scale_factor_inputs:
+        if (
+            self.image_info_inputs or self.orig_image_info_inputs or
+            self.processed_image_info_inputs or self.scale_factor_inputs
+        ):
             image_info_inputs = self._fill_image_info_inputs(data_representation_batch)
             filled_inputs = {**image_info_inputs}
         for idx, input_layer in enumerate(self.non_constant_inputs):
@@ -256,7 +269,10 @@ def match_by_regex(data, identifiers, input_regex, templates):
         filled_inputs = {}
         filled_template = {}
         check_regex = True
-        if self.image_info_inputs or self.orig_image_info_inputs or self.scale_factor_inputs:
+        if (
+            self.image_info_inputs or self.orig_image_info_inputs or
+            self.processed_image_info_inputs or self.scale_factor_inputs
+        ):
             image_info_inputs = self._fill_image_info_inputs(data_representation_batch)
             filled_inputs = {**image_info_inputs}
         for idx, input_layer in enumerate(self.non_constant_inputs):
@@ -339,6 +355,7 @@ def _parse_inputs_config(self, inputs_entry, default_layout='NCHW', precisions_l
         precisions = {}
         image_info_inputs = []
         orig_image_info_inputs = []
+        processed_image_info_inputs = []
         lstm_inputs = []
         ignore_inputs = []
         scale_factor_inputs = []
@@ -349,8 +366,8 @@ def _parse_inputs_config(self, inputs_entry, default_layout='NCHW', precisions_l
                 raise ConfigError('network does not contain input "{}"'.format(name))
             if input_['type'] in INPUT_TYPES_WITHOUT_VALUE:
                 self._configure_inputs_without_value(
-                    input_, image_info_inputs, orig_image_info_inputs, scale_factor_inputs, lstm_inputs, ignore_inputs,
-                    precision_info, precisions)
+                    input_, image_info_inputs, orig_image_info_inputs, processed_image_info_inputs, scale_factor_inputs,
+                    lstm_inputs, ignore_inputs, precision_info, precisions)
                 continue
 
             value = input_.get('value')
@@ -376,7 +393,8 @@ def _parse_inputs_config(self, inputs_entry, default_layout='NCHW', precisions_l
 
         all_config_inputs = (
             config_non_constant_inputs + list(constant_inputs.keys()) +
-            image_info_inputs + lstm_inputs + orig_image_info_inputs + ignore_inputs + scale_factor_inputs
+            image_info_inputs + lstm_inputs + orig_image_info_inputs + ignore_inputs + scale_factor_inputs +
+            processed_image_info_inputs
         )
         not_config_inputs = [input_layer for input_layer in self.network_inputs if input_layer not in all_config_inputs]
         if config_non_constant_inputs and not_config_inputs:
@@ -394,6 +412,7 @@ def _parse_inputs_config(self, inputs_entry, default_layout='NCHW', precisions_l
             non_constant_inputs_mapping or None,
             image_info_inputs,
             orig_image_info_inputs,
+            processed_image_info_inputs,
             scale_factor_inputs,
             lstm_inputs,
             ignore_inputs,
@@ -404,7 +423,7 @@ def _parse_inputs_config(self, inputs_entry, default_layout='NCHW', precisions_l
 
     def _configure_inputs_without_value(
             self, input_config, image_info_inputs,
-            orig_image_info_inputs, scale_factor_inputs, lstm_inputs, ignore_inputs,
+            orig_image_info_inputs, processed_image_info_inputs, scale_factor_inputs, lstm_inputs, ignore_inputs,
             precision_info, precisions):
         name = input_config['name']
         if input_config['type'] == 'IMAGE_INFO':
@@ -415,6 +434,10 @@ def _configure_inputs_without_value(
             orig_image_info_inputs.append(name)
             self.get_layer_precision(input_config, name, precision_info, precisions)
 
+        if input_config['type'] == 'PROCESSED_IMAGE_INFO':
+            processed_image_info_inputs.append(name)
+            self.get_layer_precision(input_config, name, precision_info, precisions)
+
         if input_config['type'] == 'SCALE_FACTOR':
             scale_factor_inputs.append(name)
             self.get_layer_precision(input_config, name, precision_info, precisions)
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/mxnet_launcher_readme.md b/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/mxnet_launcher_readme.md
@@ -16,6 +16,7 @@ Each input description should has following info:
     * `CONST_INPUT` - input will be filled using constant provided in config. It also requires to provide `value`.
     * `IMAGE_INFO` - specific key for setting information about input shape to layer (used in Faster RCNN based topologies). You do not need to provide `value`, because it will be calculated in runtime. Format value is list with `N` elements of the form `[H, W, S]`, where `N` is batch size, `H` - original image height, `W` - original image width, `S` - scale of original image (default 1).
     * `ORIG_IMAGE_INFO` - specific key for setting information about original image size before preprocessing.
+    * `PROCESSED_IMAGE_INFO` - specific key for setting information about input size after preprocessing.
     * `SCALE_FACTOR` - specific key for setting information about image scale factor defined as `[SCALE_Y, SCALE_X]`, where `SCALE_Y` = `<resized_image_height>/<original_image_height`, `SCALE_X` = `<resized_image_width> / <original_image_width>`
     * `IGNORE_INPUT` - input which should be stayed empty during evaluation.
     * `INPUT` - network input for main data stream (e. g. images). If you have several data inputs, you should provide regular expression for identifier as `value` for specifying which one data should be provided in specific input.
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/onnx_runtime_launcher_readme.md b/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/onnx_runtime_launcher_readme.md
@@ -21,6 +21,7 @@ Each input description should has following info:
     * `CONST_INPUT` - input will be filled using constant provided in config. It also requires to provide `value`.
     * `IMAGE_INFO` - specific key for setting information about input shape to layer (used in Faster RCNN based topologies). You do not need to provide `value`, because it will be calculated in runtime. Format value is list with `N` elements of the form `[H, W, S]`, where `N` is batch size, `H` - original image height, `W` - original image width, `S` - scale of original image (default 1).
     * `ORIG_IMAGE_INFO` - specific key for setting information about original image size before preprocessing.
+    * `PROCESSED_IMAGE_INFO` - specific key for setting information about input size after preprocessing.
     * `SCALE_FACTOR` - specific key for setting information about image scale factor defined as `[SCALE_Y, SCALE_X]`, where `SCALE_Y` = `<resized_image_height>/<original_image_height`, `SCALE_X` = `<resized_image_width> / <original_image_width>`
     * `IGNORE_INPUT` - input which should be stayed empty during evaluation.
     * `INPUT` - network input for main data stream (e. g. images). If you have several data inputs, you should provide regular expression for identifier as `value` for specifying which one data should be provided in specific input.
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/opencv_launcher_readme.md b/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/opencv_launcher_readme.md
@@ -15,6 +15,7 @@ Each input description should has following info:
     * `CONST_INPUT` - input will be filled using constant provided in config. It also requires to provide `value`.
     * `IMAGE_INFO` - specific key for setting information about input shape to layer (used in Faster RCNN based topologies). You do not need to provide `value`, because it will be calculated in runtime. Format value is list with `N` elements of the form `[H, W, S]`, where `N` is batch size, `H` - original image height, `W` - original image width, `S` - scale of original image (default 1).
     * `ORIG_IMAGE_INFO` - specific key for setting information about original image size before preprocessing.
+    * `PROCESSED_IMAGE_INFO` - specific key for setting information about input size after preprocessing.
     * `SCALE_FACTOR` - specific key for setting information about image scale factor defined as `[SCALE_Y, SCALE_X]`, where `SCALE_Y` = `<resized_image_height>/<original_image_height`, `SCALE_X` = `<resized_image_width> / <original_image_width>`
     * `IGNORE_INPUT` - input which should be stayed empty during evaluation.
     * `INPUT` - network input for main data stream (e. g. images). If you have several data inputs, you should provide regular expression for identifier as `value` for specifying which one data should be provided in specific input.
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/openvino_launcher_readme.md b/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/openvino_launcher_readme.md
@@ -56,6 +56,7 @@ Each input description should has following info:
     * `CONST_INPUT` - input will be filled using constant provided in config. It also requires to provide `value`.
     * `IMAGE_INFO` - specific key for setting information about input shape to layer (used in Faster RCNN based topologies). You do not need to provide `value`, because it will be calculated in runtime. Format value is list with `N` elements of the form `[H, W, S]`, where `N` is batch size, `H` - original image height, `W` - original image width, `S` - scale of original image (default 1).
     * `ORIG_IMAGE_INFO` - specific key for setting information about original image size before preprocessing.
+    * `PROCESSED_IMAGE_INFO` - specific key for setting information about input size after preprocessing.
     * `SCALE_FACTOR` - specific key for setting information about image scale factor defined as `[SCALE_Y, SCALE_X]`, where `SCALE_Y` = `<resized_image_height>/<original_image_height`, `SCALE_X` = `<resized_image_width> / <original_image_width>`
     * `INPUT` - network input for main data stream (e. g. images). If you have several data inputs, you should provide regular expression for identifier as `value` for specifying which one data should be provided in specific input.
     * `LSTM_INPUT` - input which should be filled by hidden state from previous iteration. The hidden state layer name should be provided via `value` parameter.
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/pdpd_launcher_readme.md b/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/pdpd_launcher_readme.md
@@ -18,6 +18,7 @@ Each input description should has following info:
     * `CONST_INPUT` - input will be filled using constant provided in config. It also requires to provide `value`.
     * `IMAGE_INFO` - specific key for setting information about input shape to layer (used in Faster RCNN based topologies). You do not need to provide `value`, because it will be calculated in runtime. Format value is list with `N` elements of the form `[H, W, S]`, where `N` is batch size, `H` - original image height, `W` - original image width, `S` - scale of original image (default 1).
     * `ORIG_IMAGE_INFO` - specific key for setting information about original image size before preprocessing.
+    * `PROCESSED_IMAGE_INFO` - specific key for setting information about input size after preprocessing.
     * `SCALE_FACTOR` - specific key for setting information about image scale factor defined as `[SCALE_Y, SCALE_X]`, where `SCALE_Y` = `<resized_image_height>/<original_image_height`, `SCALE_X` = `<resized_image_width> / <original_image_width>`
     * `IGNORE_INPUT` - input which should be stayed empty during evaluation.
     * `INPUT` - network input for main data stream (e. g. images). If you have several data inputs, you should provide regular expression for identifier as `value` for specifying which one data should be provided in specific input.
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/pytorch_launcher_readme.md b/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/pytorch_launcher_readme.md
@@ -20,6 +20,7 @@ Each input description should has following info:
     * `CONST_INPUT` - input will be filled using constant provided in config. It also requires to provide `value`.
     * `IMAGE_INFO` - specific key for setting information about input shape to layer (used in Faster RCNN based topologies). You do not need to provide `value`, because it will be calculated in runtime. Format value is list with `N` elements of the form `[H, W, S]`, where `N` is batch size, `H` - original image height, `W` - original image width, `S` - scale of original image (default 1).
     * `ORIG_IMAGE_INFO` - specific key for setting information about original image size before preprocessing.
+    * `PROCESSED_IMAGE_INFO` - specific key for setting information about input size after preprocessing.
     * `SCALE_FACTOR` - specific key for setting information about image scale factor defined as `[SCALE_Y, SCALE_X]`, where `SCALE_Y` = `<resized_image_height>/<original_image_height`, `SCALE_X` = `<resized_image_width> / <original_image_width>`
     * `IGNORE_INPUT` - input which should be stayed empty during evaluation.
     * `INPUT` - network input for main data stream (e. g. images). If you have several data inputs, you should provide regular expression for identifier as `value` for specifying which one data should be provided in specific input.
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/tf2_launcher_readme.md b/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/tf2_launcher_readme.md
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/tf_launcher_readme.md b/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/tf_launcher_readme.md
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/tf_lite_launcher_readme.md b/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/tf_lite_launcher_readme.md

Original file line number	Diff line number	Diff line change
`@@ -369,7 +369,8 @@ def type(self):`
`369`	`369`
`370`	`370`	`class InputField(BaseField):`
`371`	`371`	`INPUTS_TYPES = (`
`372`		`- 'CONST_INPUT', 'INPUT', 'IMAGE_INFO', 'ORIG_IMAGE_INFO', 'LSTM_INPUT', 'IGNORE_INPUT', 'SCALE_FACTOR'`
	`372`	`+ 'CONST_INPUT', 'INPUT', 'IMAGE_INFO', 'ORIG_IMAGE_INFO', 'PROCESSED_IMAGE_INFO', 'LSTM_INPUT', 'IGNORE_INPUT',`
	`373`	`+ 'SCALE_FACTOR'`
`373`	`374`	`)`
`374`	`375`	`LAYOUT_TYPES = ('NCHW', 'NHWC', 'NCWH', 'NWHC')`
`375`	`376`	`PRECISIONS = ('FP32', 'FP16', 'U8', 'U16', 'I8', 'I16', 'I32', 'I64')`