Skip to content

Commit fb41315

Browse files
authored
Add sub evaluation with subset metrics (#3892)
1 parent 8c9bc0d commit fb41315

File tree

7 files changed

+176
-21
lines changed

7 files changed

+176
-21
lines changed

tools/accuracy_checker/README.md

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ Use `-h, --help` to get the full list of command-line options. Some arguments ar
161161
- `--shuffle` allows shuffle annotation during creation a subset if subsample_size argument is provided. Default is `True`.
162162
- `--intermediate_metrics_results` enables intermediate metrics results printing. Default is `False`
163163
- `--metrics_interval` number of iterations for updated metrics result printing if `--intermediate_metrics_results` flag enabled. Default is 1000.
164+
- `--sub_evaluation` enables evaluation of subset of dataset with predefined `subset_metrics`. Default is `False`. See [Sub evaluation with subset metrics](#Sub-evaluation-with-subset-metrics)
164165

165166
You are also able to replace some command-line arguments with the environment variables for path prefixing. Supported list of variables includes:
166167
* `DEFINITIONS_FILE` - equivalent of `-d`, `-definitions`.
@@ -248,9 +249,13 @@ You can convert annotation in-place using:
248249

249250
or use existing annotation file and dataset meta:
250251
- `annotation` - path to annotation file, you must **convert annotation to representation of dataset problem first**, you may choose one of the converters from *annotation-converters* if there is already converter for your dataset or write your own.
251-
- `dataset_meta`: path to metadata file (generated by converter).
252+
- `dataset_meta` - path to metadata file (generated by converter).
252253
More detailed information about annotation conversion you can find in [Annotation Conversion Guide](openvino/tools/accuracy_checker/annotation_converters/README.md).
253254

255+
- `subset_metrics` - list of dataset subsets with uniqe size and metrics, computed if `--sub_evaluation` flag enabled. If `subsample_size` is defined then only subset with matching `subset_size` is evaluated, otherwise by default the first subset is validated. See [Sub evaluation with subset metrics](#Sub-evaluation-with-subset-metrics).
256+
- `subset_size` - size of dataset subset to evaluate, its value is compared with `subsample_size` to select desired subset for evaluation.
257+
- `metrics` - list of metrics specific for defined subset size
258+
254259
Example of dataset definition:
255260

256261
```yaml
@@ -303,6 +308,38 @@ metrics:
303308
threshold: 0.005
304309
```
305310

311+
#### Sub-evaluation with subset metrics
312+
313+
You may optionally enable `sub_evaluation` flag to quickly get results for subset of big dataset.
314+
The `subset_metrics` needs to provide subsets with different `subset_size` and `metrics`.
315+
If `subset_metrics` consist several entries, you may use `subsample_size` value to select desired `subset_size`, otherwise the first defined `subset_size` will be used.
316+
317+
Note: Enabling `sub_evaluation` flag has no effect when accuracy config has no `subset_metrics` defined.
318+
319+
Example:
320+
321+
```yaml
322+
metrics:
323+
- type: accuracy
324+
top_k: 5
325+
reference: 86.43
326+
subset_metrics:
327+
- subset_size: "10%"
328+
metrics:
329+
- type: accuracy
330+
top_k: 5
331+
reference: 86.13
332+
- subset_size: "20%"
333+
metrics:
334+
- type: accuracy
335+
top_k: 5
336+
reference: 86.23
337+
top_k: 1
338+
reference: 76.42
339+
```
340+
341+
342+
306343
### Testing New Models
307344

308345
Typical workflow for testing a new model includes:

tools/accuracy_checker/openvino/tools/accuracy_checker/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@
1414
limitations under the License.
1515
"""
1616

17-
__version__ = "0.9.5"
17+
__version__ = "0.9.6"

tools/accuracy_checker/openvino/tools/accuracy_checker/argparser.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,13 @@ def add_dataset_related_args(parser):
129129
help='file name for saving or reading identifiers subset',
130130
required=False
131131
)
132+
dataset_related_args.add_argument(
133+
'--sub_evaluation',
134+
help='attempt to use subset size and metrics for sub evaluation',
135+
type=cast_to_bool,
136+
default=False,
137+
required=False
138+
)
132139

133140

134141
def add_profiling_related_args(parser):

tools/accuracy_checker/openvino/tools/accuracy_checker/config/config_reader.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -881,6 +881,8 @@ def _add_subset_specific_arg(dataset_entry, arguments):
881881
dataset_entry['subset_file'] = arguments.subset_file
882882
if 'store_subset' in arguments and arguments.store_subset:
883883
dataset_entry['store_subset'] = arguments.store_subset
884+
if 'sub_evaluation' in arguments and arguments.sub_evaluation:
885+
dataset_entry['sub_evaluation'] = arguments.sub_evaluation
884886

885887

886888
def prepare_commandline_conversion_mapping(commandline_conversion, args):

tools/accuracy_checker/openvino/tools/accuracy_checker/dataset.py

Lines changed: 45 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,13 @@ def parameters(cls):
111111
optional=True, default=False,
112112
description='save subset ids to file specified in subset_file parameter'
113113
),
114+
'subset_metrics': ListField(
115+
allow_empty=False, optional=True, description='list of metrics for subset evaluation'
116+
),
117+
'sub_evaluation': BoolField(
118+
optional=True, default=False,
119+
description='if subset_metrics defined, use its subset size and metrics for evaluation'
120+
),
114121
'batch': NumberField(value_type=int, min_value=1, optional=True, description='batch size for data read'),
115122
'_profile': BoolField(optional=True, default=False, description='allow metric profiling'),
116123
'_report_type': StringField(optional=True, choices=['json', 'csv'], description='type profiling report'),
@@ -171,15 +178,23 @@ def _save_annotation():
171178

172179
if not annotation:
173180
raise ConfigError('path to converted annotation or data for conversion should be specified')
181+
182+
sub_evaluation = config.get('sub_evaluation', False) and not ignore_subset_settings(config)
183+
184+
if sub_evaluation:
185+
if use_converted_annotation and contains_all(config, ['annotation', 'annotation_conversion']):
186+
_save_annotation()
187+
174188
no_recursion = (meta or {}).get('no_recursion', False)
175189
annotation = _create_subset(annotation, config, no_recursion)
176190
dataset_analysis = config.get('analyze_dataset', False)
177191

178192
if dataset_analysis:
179193
meta = _run_dataset_analysis(meta)
180194

181-
if use_converted_annotation and contains_all(config, ['annotation', 'annotation_conversion']):
182-
_save_annotation()
195+
if not sub_evaluation:
196+
if use_converted_annotation and contains_all(config, ['annotation', 'annotation_conversion']):
197+
_save_annotation()
183198

184199
return annotation, meta
185200

@@ -198,7 +213,7 @@ def send_annotation_info(self, config):
198213
'dataset_size': self.size
199214
}
200215
convert_annotation = True
201-
subsample_size = config.get('subsample_size')
216+
subsample_size = get_subsample_size(config)
202217
subsample_meta = {'subset': False, 'shuffle': False}
203218
if not ignore_subset_settings(config):
204219

@@ -767,7 +782,7 @@ def provide_data_info(self, annotations, progress_reporter=None):
767782
return annotations
768783

769784
def set_annotation(self, annotation, meta):
770-
subsample_size = self.dataset_config.get('subsample_size')
785+
subsample_size = get_subsample_size(self.dataset_config)
771786
if subsample_size is not None:
772787
subsample_seed = self.dataset_config.get('subsample_seed', 666)
773788

@@ -791,7 +806,7 @@ def send_annotation_info(self, config):
791806
'annotation_saving': False,
792807
'dataset_size': self.size
793808
}
794-
subsample_size = config.get('subsample_size')
809+
subsample_size = get_subsample_size(config)
795810
subsample_meta = {'subset': False, 'shuffle': False}
796811
convert_annotation = True
797812
if not ignore_subset_settings(config):
@@ -834,19 +849,35 @@ def ignore_subset_settings(config):
834849
return False
835850

836851

837-
def _create_subset(annotation, config, no_recursion=False):
838-
subsample_size = config.get('subsample_size')
839-
if not ignore_subset_settings(config):
852+
def get_subsample_size(config):
853+
size = config.get('subsample_size')
854+
sub_evaluation = config.get('sub_evaluation', False)
855+
if sub_evaluation:
856+
subset_metrics = config.get('subset_metrics',[])
857+
for item in subset_metrics:
858+
subset_size = item.get('subset_size')
859+
if size is None or subset_size == size:
860+
# first subset_metrics or matching subsample_size
861+
size = subset_size
862+
break
863+
return size
864+
840865

866+
def _create_subset(annotation, config, no_recursion=False):
867+
if ignore_subset_settings(config):
868+
if config.get('subsample_size') is not None:
869+
warnings.warn("Subset selection parameters will be ignored")
870+
config.pop('subsample_size', None)
871+
config.pop('subsample_seed', None)
872+
config.pop('shuffle', None)
873+
if config.get('sub_evaluation') is not None:
874+
warnings.warn("Sub evaluation will be ignored")
875+
config.pop('sub_evaluation', None)
876+
else:
877+
subsample_size = get_subsample_size(config)
841878
if subsample_size is not None:
842879
subsample_seed = config.get('subsample_seed', 666)
843880
shuffle = config.get('shuffle', True)
844881
annotation = create_subset(annotation, subsample_size, subsample_seed, shuffle, no_recursion)
845882

846-
elif subsample_size is not None:
847-
warnings.warn("Subset selection parameters will be ignored")
848-
config.pop('subsample_size', None)
849-
config.pop('subsample_seed', None)
850-
config.pop('shuffle', None)
851-
852883
return annotation

tools/accuracy_checker/openvino/tools/accuracy_checker/evaluators/model_evaluator.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def from_configs(cls, model_config, delayed_annotation_loading=False):
106106
postprocessor = PostprocessingExecutor(dataset_config.get('postprocessing'), dataset_name, dataset_metadata)
107107
metric_dispatcher = None
108108
if not delayed_annotation_loading:
109-
metric_dispatcher = MetricsExecutor(dataset_config.get('metrics', []), dataset)
109+
metric_dispatcher = MetricsExecutor(get_config_metrics(dataset_config), dataset)
110110
if metric_dispatcher.profile_metrics:
111111
metric_dispatcher.set_processing_info(ModelEvaluator.get_processing_info(model_config))
112112

@@ -159,7 +159,7 @@ def validate_config(cls, model_config, delayed_annotation_loading=False):
159159
)
160160
config_errors.extend(
161161
MetricsExecutor.validate_config(
162-
dataset_config.get('metrics', []), fetch_only=True,
162+
get_config_metrics(dataset_config), fetch_only=True,
163163
uri_prefix='{}.metrics'.format(current_dataset_uri))
164164
)
165165

@@ -227,7 +227,7 @@ def send_processing_info(self, sender):
227227
adapter_type = None
228228
if adapter:
229229
adapter_type = adapter if isinstance(adapter, str) else adapter.get('type')
230-
metrics = dataset_config.get('metrics', [])
230+
metrics = get_config_metrics(dataset_config)
231231
metric_info = [metric['type'] for metric in metrics]
232232
details.update({
233233
'metrics': metric_info,
@@ -773,10 +773,10 @@ def release(self):
773773
def provide_metric_references(cls, conf, return_header=True):
774774
processing_info = cls.get_processing_info(conf)
775775
dataset_config = conf['datasets'][0]
776-
metric_dispatcher = MetricsExecutor(dataset_config.get('metrics', []), postpone_metrics=True)
776+
metric_dispatcher = MetricsExecutor(get_config_metrics(dataset_config), postpone_metrics=True)
777777
extracted_results, extracted_meta = [], []
778778
for result_presenter, metric_result in metric_dispatcher.get_metric_result_template(
779-
dataset_config.get('metrics', []), False):
779+
get_config_metrics(dataset_config), False):
780780
result, metadata = result_presenter.extract_result(metric_result, names_from_refs=True)
781781
if isinstance(result, list):
782782
extracted_results.extend(result)
@@ -788,3 +788,18 @@ def provide_metric_references(cls, conf, return_header=True):
788788
if not return_header:
789789
return report
790790
return header, report
791+
792+
793+
def get_config_metrics(config):
794+
metrics = None
795+
sub_evaluation = config.get('sub_evaluation', False)
796+
if sub_evaluation is not None:
797+
size = config.get('subsample_size')
798+
subset_metrics = config.get('subset_metrics',[])
799+
for item in subset_metrics:
800+
subset_size = item.get('subset_size')
801+
if size is None or subset_size == size:
802+
# first subset_metrics or matching subsample_size
803+
metrics = item.get('metrics')
804+
break
805+
return config.get('metrics',[]) if (metrics is None) else metrics

tools/accuracy_checker/tests/test_dataset.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,3 +386,66 @@ def test_create_data_provider_with_subset_file(self, mocker):
386386
assert len(dataset.data_provider) == 1
387387
assert dataset.identifiers == ['1']
388388
assert dataset.data_provider.full_size == 2
389+
390+
def test_sub_evaluation_annotation_conversion_subset_ratio_from_subset_metrics(self, mocker):
391+
addition_options = {
392+
'annotation_conversion': {'converter': 'wider', 'annotation_file': Path('file')},
393+
'sub_evaluation': True,
394+
'subset_metrics': [{'subset_size': '50%'}]
395+
}
396+
config = copy_dataset_config(self.dataset_config)
397+
config.update(addition_options)
398+
converted_annotation = make_representation(['0 0 0 5 5', '0 1 1 10 10'], True)
399+
mocker.patch(
400+
'openvino.tools.accuracy_checker.annotation_converters.WiderFormatConverter.convert',
401+
return_value=ConverterReturn(converted_annotation, None, None)
402+
)
403+
subset_maker_mock = mocker.patch(
404+
'openvino.tools.accuracy_checker.dataset.make_subset'
405+
)
406+
Dataset.load_annotation(config)
407+
subset_maker_mock.assert_called_once_with(converted_annotation, 1, 666, True, False)
408+
409+
def test_sub_evaluation_annotation_conversion_subset_ratio_from_subset_metrics(self, mocker):
410+
addition_options = {
411+
'annotation_conversion': {'converter': 'wider', 'annotation_file': Path('file')},
412+
'sub_evaluation': True,
413+
'subset_metrics': [{'subset_size': '50%'}]
414+
}
415+
config = copy_dataset_config(self.dataset_config)
416+
config.update(addition_options)
417+
converted_annotation = make_representation(['0 0 0 5 5', '0 1 1 10 10'], True)
418+
mocker.patch(
419+
'openvino.tools.accuracy_checker.annotation_converters.WiderFormatConverter.convert',
420+
return_value=ConverterReturn(converted_annotation, None, None)
421+
)
422+
subset_maker_mock = mocker.patch(
423+
'openvino.tools.accuracy_checker.dataset.make_subset'
424+
)
425+
Dataset.load_annotation(config)
426+
subset_maker_mock.assert_called_once_with(converted_annotation, 1, 666, True, False)
427+
428+
def test_sub_evaluation_annotation_convered_saved_before_subset(self, mocker):
429+
addition_options = {
430+
'annotation_conversion': {'converter': 'wider', 'annotation_file': Path('file')},
431+
'annotation': Path('custom'),
432+
'sub_evaluation': True,
433+
'subset_metrics': [{'subset_size': '50%'}]
434+
}
435+
config = copy_dataset_config(self.dataset_config)
436+
config.update(addition_options)
437+
converted_annotation = make_representation(['0 0 0 5 5', '0 1 1 10 10'], True)
438+
mocker.patch(
439+
'openvino.tools.accuracy_checker.annotation_converters.WiderFormatConverter.convert',
440+
return_value=ConverterReturn(converted_annotation, None, None)
441+
)
442+
annotation_saver_mock = mocker.patch(
443+
'openvino.tools.accuracy_checker.dataset.save_annotation'
444+
)
445+
mocker.patch('pathlib.Path.exists', return_value=False)
446+
subset_maker_mock = mocker.patch(
447+
'openvino.tools.accuracy_checker.dataset.make_subset'
448+
)
449+
Dataset.load_annotation(config)
450+
annotation_saver_mock.assert_called_once_with(converted_annotation, None, Path('custom'), None, config)
451+
subset_maker_mock.assert_called_once_with(converted_annotation, 1, 666, True, False)

0 commit comments

Comments
 (0)