Add sub evaluation with subset metrics (#3892)

pwolnows · web-flow · commit fb4131530fb1 · 2024-01-09T10:32:46.000+04:00
diff --git a/tools/accuracy_checker/README.md b/tools/accuracy_checker/README.md
@@ -161,6 +161,7 @@ Use `-h, --help` to get the full list of command-line options. Some arguments ar
 - `--shuffle` allows shuffle annotation during creation a subset if subsample_size argument is provided. Default is `True`.
 - `--intermediate_metrics_results` enables intermediate metrics results printing. Default is `False`
 - `--metrics_interval` number of iterations for updated metrics result printing if `--intermediate_metrics_results` flag enabled. Default is 1000.
+- `--sub_evaluation` enables evaluation of subset of dataset with predefined `subset_metrics`. Default is `False`. See [Sub evaluation with subset metrics](#Sub-evaluation-with-subset-metrics)
 
 You are also able to replace some command-line arguments with the environment variables for path prefixing. Supported list of variables includes:
 * `DEFINITIONS_FILE` - equivalent of `-d`, `-definitions`.
@@ -248,9 +249,13 @@ You can convert annotation in-place using:
 
 or use existing annotation file and dataset meta:
 - `annotation` - path to annotation file, you must **convert annotation to representation of dataset problem first**, you may choose one of the converters from *annotation-converters* if there is already converter for your dataset or write your own.
-- `dataset_meta`: path to metadata file (generated by converter).
+- `dataset_meta` - path to metadata file (generated by converter).
 More detailed information about annotation conversion you can find in [Annotation Conversion Guide](openvino/tools/accuracy_checker/annotation_converters/README.md).
 
+- `subset_metrics` - list of dataset subsets with uniqe size and metrics, computed if `--sub_evaluation` flag enabled. If `subsample_size` is defined then only subset with matching `subset_size` is evaluated, otherwise by default the first subset is validated. See [Sub evaluation with subset metrics](#Sub-evaluation-with-subset-metrics).
+  - `subset_size` - size of dataset subset to evaluate, its value is compared with `subsample_size` to select desired subset for evaluation.
+  - `metrics` - list of metrics specific for defined subset size 
+
 Example of dataset definition:
 
 ```yaml
@@ -303,6 +308,38 @@ metrics:
   threshold: 0.005
 ```
 
+#### Sub-evaluation with subset metrics
+
+You may optionally enable `sub_evaluation` flag to quickly get results for subset of big dataset.
+The `subset_metrics` needs to provide subsets with different `subset_size` and `metrics`.
+If `subset_metrics` consist several entries, you may use `subsample_size` value to select desired `subset_size`, otherwise the first defined `subset_size` will be used.
+
+Note: Enabling `sub_evaluation` flag has no effect when accuracy config has no `subset_metrics` defined. 
+
+Example:
+
+```yaml
+metrics:
+- type: accuracy
+  top_k: 5
+  reference: 86.43
+subset_metrics:
+  - subset_size: "10%"
+    metrics:
+      - type: accuracy
+        top_k: 5
+        reference: 86.13
+  - subset_size: "20%"
+    metrics:
+      - type: accuracy
+        top_k: 5
+        reference: 86.23
+        top_k: 1
+        reference: 76.42
+```
+
+
+
 ### Testing New Models
 
 Typical workflow for testing a new model includes:
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/__init__.py b/tools/accuracy_checker/openvino/tools/accuracy_checker/__init__.py
@@ -14,4 +14,4 @@
 limitations under the License.
 """
 
-__version__ = "0.9.5"
+__version__ = "0.9.6"
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/argparser.py b/tools/accuracy_checker/openvino/tools/accuracy_checker/argparser.py
@@ -129,6 +129,13 @@ def add_dataset_related_args(parser):
         help='file name for saving or reading identifiers subset',
         required=False
     )
+    dataset_related_args.add_argument(
+        '--sub_evaluation',
+        help='attempt to use subset size and metrics for sub evaluation',
+        type=cast_to_bool,
+        default=False,
+        required=False
+    )
 
 
 def add_profiling_related_args(parser):
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/config/config_reader.py b/tools/accuracy_checker/openvino/tools/accuracy_checker/config/config_reader.py
@@ -881,6 +881,8 @@ def _add_subset_specific_arg(dataset_entry, arguments):
         dataset_entry['subset_file'] = arguments.subset_file
     if 'store_subset' in arguments and arguments.store_subset:
         dataset_entry['store_subset'] = arguments.store_subset
+    if 'sub_evaluation' in arguments and arguments.sub_evaluation:
+        dataset_entry['sub_evaluation'] = arguments.sub_evaluation
 
 
 def prepare_commandline_conversion_mapping(commandline_conversion, args):
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/dataset.py b/tools/accuracy_checker/openvino/tools/accuracy_checker/dataset.py
@@ -111,6 +111,13 @@ def parameters(cls):
                 optional=True, default=False,
                 description='save subset ids to file specified in subset_file parameter'
             ),
+            'subset_metrics': ListField(
+                allow_empty=False, optional=True, description='list of metrics for subset evaluation'
+            ),
+            'sub_evaluation': BoolField(
+                optional=True, default=False,
+                description='if subset_metrics defined, use its subset size and metrics for evaluation'
+            ),
             'batch': NumberField(value_type=int, min_value=1, optional=True, description='batch size for data read'),
             '_profile': BoolField(optional=True, default=False, description='allow metric profiling'),
             '_report_type': StringField(optional=True, choices=['json', 'csv'], description='type profiling report'),
@@ -171,15 +178,23 @@ def _save_annotation():
 
         if not annotation:
             raise ConfigError('path to converted annotation or data for conversion should be specified')
+
+        sub_evaluation = config.get('sub_evaluation', False) and not ignore_subset_settings(config)
+
+        if sub_evaluation:
+            if use_converted_annotation and contains_all(config, ['annotation', 'annotation_conversion']):
+                _save_annotation()
+
         no_recursion = (meta or {}).get('no_recursion', False)
         annotation = _create_subset(annotation, config, no_recursion)
         dataset_analysis = config.get('analyze_dataset', False)
 
         if dataset_analysis:
             meta = _run_dataset_analysis(meta)
 
-        if use_converted_annotation and contains_all(config, ['annotation', 'annotation_conversion']):
-            _save_annotation()
+        if not sub_evaluation:
+            if use_converted_annotation and contains_all(config, ['annotation', 'annotation_conversion']):
+                _save_annotation()
 
         return annotation, meta
 
@@ -198,7 +213,7 @@ def send_annotation_info(self, config):
             'dataset_size': self.size
         }
         convert_annotation = True
-        subsample_size = config.get('subsample_size')
+        subsample_size = get_subsample_size(config)
         subsample_meta = {'subset': False, 'shuffle': False}
         if not ignore_subset_settings(config):
 
@@ -767,7 +782,7 @@ def provide_data_info(self, annotations, progress_reporter=None):
         return annotations
 
     def set_annotation(self, annotation, meta):
-        subsample_size = self.dataset_config.get('subsample_size')
+        subsample_size = get_subsample_size(self.dataset_config)
         if subsample_size is not None:
             subsample_seed = self.dataset_config.get('subsample_seed', 666)
 
@@ -791,7 +806,7 @@ def send_annotation_info(self, config):
             'annotation_saving': False,
             'dataset_size': self.size
         }
-        subsample_size = config.get('subsample_size')
+        subsample_size = get_subsample_size(config)
         subsample_meta = {'subset': False, 'shuffle': False}
         convert_annotation = True
         if not ignore_subset_settings(config):
@@ -834,19 +849,35 @@ def ignore_subset_settings(config):
     return False
 
 
-def _create_subset(annotation, config, no_recursion=False):
-    subsample_size = config.get('subsample_size')
-    if not ignore_subset_settings(config):
+def get_subsample_size(config):
+    size = config.get('subsample_size')
+    sub_evaluation = config.get('sub_evaluation', False)
+    if sub_evaluation:
+        subset_metrics = config.get('subset_metrics',[])
+        for item in subset_metrics:
+            subset_size = item.get('subset_size')
+            if size is None or subset_size == size:
+                # first subset_metrics or matching subsample_size
+                size = subset_size
+                break
+    return size
+
 
+def _create_subset(annotation, config, no_recursion=False):
+    if ignore_subset_settings(config):
+        if config.get('subsample_size') is not None:
+            warnings.warn("Subset selection parameters will be ignored")
+            config.pop('subsample_size', None)
+            config.pop('subsample_seed', None)
+            config.pop('shuffle', None)
+        if config.get('sub_evaluation') is not None:
+            warnings.warn("Sub evaluation will be ignored")
+            config.pop('sub_evaluation', None)
+    else:
+        subsample_size = get_subsample_size(config)
         if subsample_size is not None:
             subsample_seed = config.get('subsample_seed', 666)
             shuffle = config.get('shuffle', True)
             annotation = create_subset(annotation, subsample_size, subsample_seed, shuffle, no_recursion)
 
-    elif subsample_size is not None:
-        warnings.warn("Subset selection parameters will be ignored")
-        config.pop('subsample_size', None)
-        config.pop('subsample_seed', None)
-        config.pop('shuffle', None)
-
     return annotation
diff --git a/tools/accuracy_checker/openvino/tools/accuracy_checker/evaluators/model_evaluator.py b/tools/accuracy_checker/openvino/tools/accuracy_checker/evaluators/model_evaluator.py
@@ -106,7 +106,7 @@ def from_configs(cls, model_config, delayed_annotation_loading=False):
         postprocessor = PostprocessingExecutor(dataset_config.get('postprocessing'), dataset_name, dataset_metadata)
         metric_dispatcher = None
         if not delayed_annotation_loading:
-            metric_dispatcher = MetricsExecutor(dataset_config.get('metrics', []), dataset)
+            metric_dispatcher = MetricsExecutor(get_config_metrics(dataset_config), dataset)
             if metric_dispatcher.profile_metrics:
                 metric_dispatcher.set_processing_info(ModelEvaluator.get_processing_info(model_config))
 
@@ -159,7 +159,7 @@ def validate_config(cls, model_config, delayed_annotation_loading=False):
                     )
                     config_errors.extend(
                         MetricsExecutor.validate_config(
-                            dataset_config.get('metrics', []), fetch_only=True,
+                            get_config_metrics(dataset_config), fetch_only=True,
                             uri_prefix='{}.metrics'.format(current_dataset_uri))
                     )
 
@@ -227,7 +227,7 @@ def send_processing_info(self, sender):
         adapter_type = None
         if adapter:
             adapter_type = adapter if isinstance(adapter, str) else adapter.get('type')
-        metrics = dataset_config.get('metrics', [])
+        metrics = get_config_metrics(dataset_config)
         metric_info = [metric['type'] for metric in metrics]
         details.update({
             'metrics': metric_info,
@@ -773,10 +773,10 @@ def release(self):
     def provide_metric_references(cls, conf, return_header=True):
         processing_info = cls.get_processing_info(conf)
         dataset_config = conf['datasets'][0]
-        metric_dispatcher = MetricsExecutor(dataset_config.get('metrics', []), postpone_metrics=True)
+        metric_dispatcher = MetricsExecutor(get_config_metrics(dataset_config), postpone_metrics=True)
         extracted_results, extracted_meta = [], []
         for result_presenter, metric_result in metric_dispatcher.get_metric_result_template(
-            dataset_config.get('metrics', []), False):
+            get_config_metrics(dataset_config), False):
             result, metadata = result_presenter.extract_result(metric_result, names_from_refs=True)
             if isinstance(result, list):
                 extracted_results.extend(result)
@@ -788,3 +788,18 @@ def provide_metric_references(cls, conf, return_header=True):
         if not return_header:
             return report
         return header, report
+
+
+def get_config_metrics(config):
+    metrics = None
+    sub_evaluation = config.get('sub_evaluation', False)
+    if sub_evaluation is not None:
+        size = config.get('subsample_size')
+        subset_metrics = config.get('subset_metrics',[])
+        for item in subset_metrics:
+            subset_size = item.get('subset_size')
+            if size is None or subset_size == size:
+                # first subset_metrics or matching subsample_size
+                metrics = item.get('metrics')
+                break
+    return config.get('metrics',[]) if (metrics is None) else metrics
diff --git a/tools/accuracy_checker/tests/test_dataset.py b/tools/accuracy_checker/tests/test_dataset.py
@@ -386,3 +386,66 @@ def test_create_data_provider_with_subset_file(self, mocker):
         assert len(dataset.data_provider) == 1
         assert dataset.identifiers == ['1']
         assert dataset.data_provider.full_size == 2
+
+    def test_sub_evaluation_annotation_conversion_subset_ratio_from_subset_metrics(self, mocker):
+        addition_options = {
+            'annotation_conversion': {'converter': 'wider', 'annotation_file': Path('file')},
+            'sub_evaluation': True,
+            'subset_metrics': [{'subset_size': '50%'}]
+        }
+        config = copy_dataset_config(self.dataset_config)
+        config.update(addition_options)
+        converted_annotation = make_representation(['0 0 0 5 5', '0 1 1 10 10'], True)
+        mocker.patch(
+            'openvino.tools.accuracy_checker.annotation_converters.WiderFormatConverter.convert',
+            return_value=ConverterReturn(converted_annotation, None, None)
+        )
+        subset_maker_mock = mocker.patch(
+            'openvino.tools.accuracy_checker.dataset.make_subset'
+        )
+        Dataset.load_annotation(config)
+        subset_maker_mock.assert_called_once_with(converted_annotation, 1, 666, True, False)
+
+    def test_sub_evaluation_annotation_conversion_subset_ratio_from_subset_metrics(self, mocker):
+        addition_options = {
+            'annotation_conversion': {'converter': 'wider', 'annotation_file': Path('file')},
+            'sub_evaluation': True,
+            'subset_metrics': [{'subset_size': '50%'}]
+        }
+        config = copy_dataset_config(self.dataset_config)
+        config.update(addition_options)
+        converted_annotation = make_representation(['0 0 0 5 5', '0 1 1 10 10'], True)
+        mocker.patch(
+            'openvino.tools.accuracy_checker.annotation_converters.WiderFormatConverter.convert',
+            return_value=ConverterReturn(converted_annotation, None, None)
+        )
+        subset_maker_mock = mocker.patch(
+            'openvino.tools.accuracy_checker.dataset.make_subset'
+        )
+        Dataset.load_annotation(config)
+        subset_maker_mock.assert_called_once_with(converted_annotation, 1, 666, True, False)
+
+    def test_sub_evaluation_annotation_convered_saved_before_subset(self, mocker):
+        addition_options = {
+            'annotation_conversion': {'converter': 'wider', 'annotation_file': Path('file')},
+            'annotation': Path('custom'),
+            'sub_evaluation': True,
+            'subset_metrics': [{'subset_size': '50%'}]
+        }
+        config = copy_dataset_config(self.dataset_config)
+        config.update(addition_options)
+        converted_annotation = make_representation(['0 0 0 5 5', '0 1 1 10 10'], True)
+        mocker.patch(
+            'openvino.tools.accuracy_checker.annotation_converters.WiderFormatConverter.convert',
+            return_value=ConverterReturn(converted_annotation, None, None)
+        )
+        annotation_saver_mock = mocker.patch(
+            'openvino.tools.accuracy_checker.dataset.save_annotation'
+        )
+        mocker.patch('pathlib.Path.exists', return_value=False)
+        subset_maker_mock = mocker.patch(
+            'openvino.tools.accuracy_checker.dataset.make_subset'
+        )
+        Dataset.load_annotation(config)
+        annotation_saver_mock.assert_called_once_with(converted_annotation, None, Path('custom'), None, config)
+        subset_maker_mock.assert_called_once_with(converted_annotation, 1, 666, True, False)