Skip to content

Commit 230596b

Browse files
authored
[Enhacement] Allow test.py to save evaluation results (#108)
* Add log file * Delete debug code * Rename logger * resolve comments
1 parent f2d0b15 commit 230596b

File tree

11 files changed

+88
-63
lines changed

11 files changed

+88
-63
lines changed

docs/en/tutorials/how_to_evaluate_a_model.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ ${MODEL_CFG} \
2222
--device ${DEVICE} \
2323
[--cfg-options ${CFG_OPTIONS}] \
2424
[--metric-options ${METRIC_OPTIONS}]
25+
[--log2file work_dirs/output.txt]
2526
```
2627

2728
## Description of all arguments
@@ -39,6 +40,7 @@ ${MODEL_CFG} \
3940
* `--cfg-options`: Extra or overridden settings that will be merged into the current deploy config.
4041
* `--metric-options`: Custom options for evaluation. The key-value pair in xxx=yyy
4142
format will be kwargs for dataset.evaluate() function.
43+
* `--log2file`: log evaluation results (and speed) to file.
4244

4345
\* Other arguments in `tools/test.py` are used for speed test. They have no concern with evaluation.
4446

docs/en/tutorials/how_to_measure_performance_of_models.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ ${MODEL_CFG} \
2424
* `deploy_cfg`: The config for deployment.
2525
* `model_cfg`: The config of the model in OpenMMLab codebases.
2626
* `--model`: The backend model files. For example, if we convert a model to ncnn, we need to pass a ".param" file and a ".bin" file. If we convert a model to TensorRT, we need to pass the model file with ".engine" suffix.
27+
* `--log2file`: log evaluation results and speed to file.
2728
* `--speed-test`: Whether to activate speed test.
2829
* `--warmup`: warmup before counting inference elapse, require setting speed-test first.
2930
* `--log-interval`: The interval between each log, require setting speed-test first.
30-
* `--log2file`: Log speed test result in file format, need speed-test first.
3131

3232
\* Other arguments in `tools/test.py` are used for performance test. They have no concern with speed test.
3333

mmdeploy/codebase/base/task.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ def evaluate_outputs(model_cfg,
230230
out: Optional[str] = None,
231231
metric_options: Optional[dict] = None,
232232
format_only: bool = False,
233+
log_file: Optional[str] = None,
233234
**kwargs):
234235
"""Perform post-processing to predictions of model.
235236
@@ -244,13 +245,16 @@ def evaluate_outputs(model_cfg,
244245
for single label dataset, and "mAP", "CP", "CR", "CF1",
245246
"OP", "OR", "OF1" for multi-label dataset in mmcls.
246247
Defaults is `None`.
247-
out (str): Output result file in pickle format, defaults to `None`.
248+
out (str): Output inference results in pickle format, defaults to
249+
`None`.
248250
metric_options (dict): Custom options for evaluation, will be
249251
kwargs for dataset.evaluate() function. Defaults to `None`.
250252
format_only (bool): Format the output results without perform
251253
evaluation. It is useful when you want to format the result
252254
to a specific format and submit it to the test server. Defaults
253255
to `False`.
256+
log_file (str | None): The file to write the evaluation results.
257+
Defaults to `None` and the results will only print on stdout.
254258
"""
255259
pass
256260

mmdeploy/codebase/mmcls/deploy/classification.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Copyright (c) OpenMMLab. All rights reserved.
2+
import logging
23
from typing import Any, Dict, Optional, Sequence, Tuple, Union
34

45
import mmcv
@@ -208,7 +209,8 @@ def evaluate_outputs(model_cfg: mmcv.Config,
208209
metrics: Optional[str] = None,
209210
out: Optional[str] = None,
210211
metric_options: Optional[dict] = None,
211-
format_only: bool = False) -> None:
212+
format_only: bool = False,
213+
log_file: Optional[str] = None) -> None:
212214
"""Perform post-processing to predictions of model.
213215
214216
Args:
@@ -224,13 +226,17 @@ def evaluate_outputs(model_cfg: mmcv.Config,
224226
evaluation. It is useful when you want to format the result
225227
to a specific format and submit it to the test server.
226228
Default: False.
229+
log_file (str | None): The file to write the evaluation results.
230+
Defaults to `None` and the results will only print on stdout.
227231
"""
228232
import warnings
233+
from mmcv.utils import get_logger
234+
logger = get_logger('test', log_file=log_file, log_level=logging.INFO)
229235

230236
if metrics:
231237
results = dataset.evaluate(outputs, metrics, metric_options)
232238
for k, v in results.items():
233-
print(f'\n{k} : {v:.2f}')
239+
logger.info(f'{k} : {v:.2f}')
234240
else:
235241
warnings.warn('Evaluation metrics are not specified.')
236242
scores = np.vstack(outputs)
@@ -243,13 +249,13 @@ def evaluate_outputs(model_cfg: mmcv.Config,
243249
'pred_class': pred_class
244250
}
245251
if not out:
246-
print('\nthe predicted result for the first element is '
247-
f'pred_score = {pred_score[0]:.2f}, '
248-
f'pred_label = {pred_label[0]} '
249-
f'and pred_class = {pred_class[0]}. '
250-
'Specify --out to save all results to files.')
252+
logger.info('the predicted result for the first element is '
253+
f'pred_score = {pred_score[0]:.2f}, '
254+
f'pred_label = {pred_label[0]} '
255+
f'and pred_class = {pred_class[0]}. '
256+
'Specify --out to save all results to files.')
251257
if out:
252-
print(f'\nwriting results to {out}')
258+
logger.debug(f'writing results to {out}')
253259
mmcv.dump(results, out)
254260

255261
def get_preprocess(self) -> Dict:

mmdeploy/codebase/mmdet/deploy/object_detection.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from mmcv.parallel import DataContainer
88
from torch.utils.data import Dataset
99

10-
from mmdeploy.utils import Task, get_root_logger
10+
from mmdeploy.utils import Task
1111
from mmdeploy.utils.config_utils import get_input_shape, is_dynamic_shape
1212
from ...base import BaseTask
1313
from .mmdetection import MMDET_TASK
@@ -235,7 +235,8 @@ def evaluate_outputs(model_cfg: mmcv.Config,
235235
metrics: Optional[str] = None,
236236
out: Optional[str] = None,
237237
metric_options: Optional[dict] = None,
238-
format_only: bool = False):
238+
format_only: bool = False,
239+
log_file: Optional[str] = None):
239240
"""Perform post-processing to predictions of model.
240241
241242
Args:
@@ -252,10 +253,14 @@ def evaluate_outputs(model_cfg: mmcv.Config,
252253
evaluation. It is useful when you want to format the result
253254
to a specific format and submit it to the test server. Defaults
254255
to `False`.
256+
log_file (str | None): The file to write the evaluation results.
257+
Defaults to `None` and the results will only print on stdout.
255258
"""
259+
from mmcv.utils import get_logger
260+
logger = get_logger('test', log_file=log_file)
261+
256262
if out:
257-
logger = get_root_logger()
258-
logger.info(f'\nwriting results to {out}')
263+
logger.debug(f'writing results to {out}')
259264
mmcv.dump(outputs, out)
260265
kwargs = {} if metric_options is None else metric_options
261266
if format_only:
@@ -269,7 +274,7 @@ def evaluate_outputs(model_cfg: mmcv.Config,
269274
]:
270275
eval_kwargs.pop(key, None)
271276
eval_kwargs.update(dict(metric=metrics, **kwargs))
272-
print(dataset.evaluate(outputs, **eval_kwargs))
277+
logger.info(dataset.evaluate(outputs, **eval_kwargs))
273278

274279
def get_preprocess(self) -> Dict:
275280
"""Get the preprocess information for SDK.

mmdeploy/codebase/mmedit/deploy/super_resolution.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from mmdeploy.codebase.base import BaseTask
1212
from mmdeploy.codebase.mmedit.deploy.mmediting import MMEDIT_TASK
13-
from mmdeploy.utils import Task, get_input_shape, get_root_logger, load_config
13+
from mmdeploy.utils import Task, get_input_shape, load_config
1414

1515

1616
def process_model_config(model_cfg: mmcv.Config,
@@ -249,6 +249,7 @@ def evaluate_outputs(model_cfg,
249249
out: Optional[str] = None,
250250
metric_options: Optional[dict] = None,
251251
format_only: bool = False,
252+
log_file: Optional[str] = None,
252253
**kwargs) -> None:
253254
"""Evaluation function implemented in mmedit.
254255
@@ -265,17 +266,20 @@ def evaluate_outputs(model_cfg,
265266
evaluation. It is useful when you want to format the result
266267
to a specific format and submit it to the test server. Defaults
267268
to `False`.
269+
log_file (str | None): The file to write the evaluation results.
270+
Defaults to `None` and the results will only print on stdout.
268271
"""
272+
from mmcv.utils import get_logger
273+
logger = get_logger('test', log_file=log_file)
274+
269275
if out:
270-
logger = get_root_logger()
271-
logger.info(f'\nwriting results to {out}')
276+
logger.debug(f'writing results to {out}')
272277
mmcv.dump(outputs, out)
273278
# The Dataset doesn't need metrics
274-
print('\n')
275279
# print metrics
276280
stats = dataset.evaluate(outputs)
277281
for stat in stats:
278-
print('Eval-{}: {}'.format(stat, stats[stat]))
282+
logger.info('Eval-{}: {}'.format(stat, stats[stat]))
279283

280284
def get_preprocess(self) -> Dict:
281285
"""Get the preprocess information for SDK.

mmdeploy/codebase/mmocr/deploy/text_detection.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from torch.utils.data import Dataset
1111

1212
from mmdeploy.codebase.base import BaseTask
13-
from mmdeploy.utils import Task, get_input_shape, get_root_logger
13+
from mmdeploy.utils import Task, get_input_shape
1414
from .mmocr import MMOCR_TASK
1515

1616

@@ -246,15 +246,16 @@ def evaluate_outputs(model_cfg,
246246
metrics: Optional[str] = None,
247247
out: Optional[str] = None,
248248
metric_options: Optional[dict] = None,
249-
format_only: bool = False):
249+
format_only: bool = False,
250+
log_file: Optional[str] = None):
250251
"""Perform post-processing to predictions of model.
251252
252253
Args:
253254
outputs (Sequence): A list of predictions of model inference.
254255
dataset (Dataset): Input dataset to run test.
255256
model_cfg (mmcv.Config): The model config.
256257
metrics (str): Evaluation metrics, which depends on
257-
the codebase and the dataset, e.g., e.g., "acc" for text
258+
the codebase and the dataset, e.g., "acc" for text
258259
recognition, and "hmean-iou" for text detection.
259260
out (str): Output result file in pickle format, defaults to `None`.
260261
metric_options (dict): Custom options for evaluation, will be
@@ -263,10 +264,14 @@ def evaluate_outputs(model_cfg,
263264
evaluation. It is useful when you want to format the result
264265
to a specific format and submit it to the test server. Defaults
265266
to `False`.
267+
log_file (str | None): The file to write the evaluation results.
268+
Defaults to `None` and the results will only print on stdout.
266269
"""
270+
from mmcv.utils import get_logger
271+
logger = get_logger('test', log_file=log_file)
272+
267273
if out:
268-
logger = get_root_logger()
269-
logger.info(f'\nwriting results to {out}')
274+
logger.debug(f'writing results to {out}')
270275
mmcv.dump(outputs, out)
271276
kwargs = {} if metric_options is None else metric_options
272277
if format_only:
@@ -280,7 +285,7 @@ def evaluate_outputs(model_cfg,
280285
]:
281286
eval_kwargs.pop(key, None)
282287
eval_kwargs.update(dict(metric=metrics, **kwargs))
283-
print(dataset.evaluate(outputs, **eval_kwargs))
288+
logger.info(dataset.evaluate(outputs, **eval_kwargs))
284289

285290
def get_preprocess(self) -> Dict:
286291
"""Get the preprocess information for SDK.

mmdeploy/codebase/mmocr/deploy/text_recognition.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from torch.utils.data import Dataset
1111

1212
from mmdeploy.codebase.base import BaseTask
13-
from mmdeploy.utils import Task, get_input_shape, get_root_logger
13+
from mmdeploy.utils import Task, get_input_shape
1414
from .mmocr import MMOCR_TASK
1515

1616

@@ -259,15 +259,16 @@ def evaluate_outputs(model_cfg: mmcv.Config,
259259
metrics: Optional[str] = None,
260260
out: Optional[str] = None,
261261
metric_options: Optional[dict] = None,
262-
format_only: bool = False):
262+
format_only: bool = False,
263+
log_file: Optional[str] = None):
263264
"""Perform post-processing to predictions of model.
264265
265266
Args:
266267
model_cfg (mmcv.Config): The model config.
267268
outputs (list): A list of predictions of model inference.
268269
dataset (Dataset): Input dataset to run test.
269270
metrics (str): Evaluation metrics, which depends on
270-
the codebase and the dataset, e.g., e.g., "acc" for text
271+
the codebase and the dataset, e.g., "acc" for text
271272
recognition, and "hmean-iou" for text detection.
272273
out (str): Output result file in pickle format, defaults to `None`.
273274
metric_options (dict): Custom options for evaluation, will be
@@ -276,10 +277,14 @@ def evaluate_outputs(model_cfg: mmcv.Config,
276277
evaluation. It is useful when you want to format the result
277278
to a specific format and submit it to the test server. Defaults
278279
to `False`.
280+
log_file (str | None): The file to write the evaluation results.
281+
Defaults to `None` and the results will only print on stdout.
279282
"""
283+
from mmcv.utils import get_logger
284+
logger = get_logger('test', log_file=log_file)
285+
280286
if out:
281-
logger = get_root_logger()
282-
logger.info(f'\nwriting results to {out}')
287+
logger.debug(f'writing results to {out}')
283288
mmcv.dump(outputs, out)
284289
kwargs = {} if metric_options is None else metric_options
285290
if format_only:
@@ -293,7 +298,7 @@ def evaluate_outputs(model_cfg: mmcv.Config,
293298
]:
294299
eval_kwargs.pop(key, None)
295300
eval_kwargs.update(dict(metric=metrics, **kwargs))
296-
print(dataset.evaluate(outputs, **eval_kwargs))
301+
logger.info(dataset.evaluate(outputs, **eval_kwargs))
297302

298303
def get_preprocess(self) -> Dict:
299304
"""Get the preprocess information for SDK.

mmdeploy/codebase/mmseg/deploy/segmentation.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from torch.utils.data import Dataset
88

99
from mmdeploy.codebase.base import BaseTask
10-
from mmdeploy.utils import Task, get_input_shape, get_root_logger
10+
from mmdeploy.utils import Task, get_input_shape
1111
from .mmsegmentation import MMSEG_TASK
1212

1313

@@ -205,7 +205,8 @@ def evaluate_outputs(model_cfg,
205205
metrics: Optional[str] = None,
206206
out: Optional[str] = None,
207207
metric_options: Optional[dict] = None,
208-
format_only: bool = False):
208+
format_only: bool = False,
209+
log_file: Optional[str] = None):
209210
"""Perform post-processing to predictions of model.
210211
211212
Args:
@@ -222,16 +223,20 @@ def evaluate_outputs(model_cfg,
222223
evaluation. It is useful when you want to format the result
223224
to a specific format and submit it to the test server. Defaults
224225
to `False`.
226+
log_file (str | None): The file to write the evaluation results.
227+
Defaults to `None` and the results will only print on stdout.
225228
"""
229+
from mmcv.utils import get_logger
230+
logger = get_logger('test', log_file=log_file)
231+
226232
if out:
227-
logger = get_root_logger()
228-
logger.info(f'\nwriting results to {out}')
233+
logger.debug(f'writing results to {out}')
229234
mmcv.dump(outputs, out)
230235
kwargs = {} if metric_options is None else metric_options
231236
if format_only:
232237
dataset.format_results(outputs, **kwargs)
233238
if metrics:
234-
dataset.evaluate(outputs, metrics, **kwargs)
239+
dataset.evaluate(outputs, metrics, logger=logger, **kwargs)
235240

236241
def get_preprocess(self) -> Dict:
237242
"""Get the preprocess information for SDK.

0 commit comments

Comments
 (0)