[Enhacement] Allow test.py to save evaluation results (#108)

SingleZombie · web-flow · commit 230596bad92f · 2022-01-26T19:57:00.000+08:00
* Add log file

* Delete debug code

* Rename logger

* resolve comments
diff --git a/docs/en/tutorials/how_to_evaluate_a_model.md b/docs/en/tutorials/how_to_evaluate_a_model.md
@@ -22,6 +22,7 @@ ${MODEL_CFG} \
 --device ${DEVICE} \
 [--cfg-options ${CFG_OPTIONS}] \
 [--metric-options ${METRIC_OPTIONS}]
+[--log2file work_dirs/output.txt]
 ```
 
 ## Description of all arguments
@@ -39,6 +40,7 @@ ${MODEL_CFG} \
 * `--cfg-options`: Extra or overridden settings that will be merged into the current deploy config.
 * `--metric-options`: Custom options for evaluation. The key-value pair in xxx=yyy
 format will be kwargs for dataset.evaluate() function.
+* `--log2file`: log evaluation results (and speed) to file.
 
 \* Other arguments in `tools/test.py` are used for speed test. They have no concern with evaluation.
 
diff --git a/docs/en/tutorials/how_to_measure_performance_of_models.md b/docs/en/tutorials/how_to_measure_performance_of_models.md
@@ -24,10 +24,10 @@ ${MODEL_CFG} \
 * `deploy_cfg`: The config for deployment.
 * `model_cfg`: The config of the model in OpenMMLab codebases.
 * `--model`: The backend model files. For example, if we convert a model to ncnn, we need to pass a ".param" file and a ".bin" file. If we convert a model to TensorRT, we need to pass the model file with ".engine" suffix.
+* `--log2file`: log evaluation results and speed to file.
 * `--speed-test`:  Whether to activate speed test.
 * `--warmup`: warmup before counting inference elapse, require setting speed-test first.
 * `--log-interval`: The interval between each log, require setting speed-test first.
-* `--log2file`: Log speed test result in file format, need speed-test first.
 
 \* Other arguments in `tools/test.py` are used for performance test. They have no concern with speed test.
 
diff --git a/mmdeploy/codebase/base/task.py b/mmdeploy/codebase/base/task.py
@@ -230,6 +230,7 @@ def evaluate_outputs(model_cfg,
                          out: Optional[str] = None,
                          metric_options: Optional[dict] = None,
                          format_only: bool = False,
+                         log_file: Optional[str] = None,
                          **kwargs):
         """Perform post-processing to predictions of model.
 
@@ -244,13 +245,16 @@ def evaluate_outputs(model_cfg,
                 for single label dataset, and "mAP", "CP", "CR", "CF1",
                 "OP", "OR", "OF1" for multi-label dataset in mmcls.
                 Defaults is `None`.
-            out (str): Output result file in pickle format, defaults to `None`.
+            out (str): Output inference results in pickle format, defaults to
+                `None`.
             metric_options (dict): Custom options for evaluation, will be
                 kwargs for dataset.evaluate() function. Defaults to `None`.
             format_only (bool): Format the output results without perform
                 evaluation. It is useful when you want to format the result
                 to a specific format and submit it to the test server. Defaults
                 to `False`.
+            log_file (str | None): The file to write the evaluation results.
+                Defaults to `None` and the results will only print on stdout.
         """
         pass
 
diff --git a/mmdeploy/codebase/mmcls/deploy/classification.py b/mmdeploy/codebase/mmcls/deploy/classification.py
@@ -1,4 +1,5 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+import logging
 from typing import Any, Dict, Optional, Sequence, Tuple, Union
 
 import mmcv
@@ -208,7 +209,8 @@ def evaluate_outputs(model_cfg: mmcv.Config,
                          metrics: Optional[str] = None,
                          out: Optional[str] = None,
                          metric_options: Optional[dict] = None,
-                         format_only: bool = False) -> None:
+                         format_only: bool = False,
+                         log_file: Optional[str] = None) -> None:
         """Perform post-processing to predictions of model.
 
         Args:
@@ -224,13 +226,17 @@ def evaluate_outputs(model_cfg: mmcv.Config,
                 evaluation. It is useful when you want to format the result
                 to a specific format and submit it to the test server.
                 Default: False.
+            log_file (str | None): The file to write the evaluation results.
+                Defaults to `None` and the results will only print on stdout.
         """
         import warnings
+        from mmcv.utils import get_logger
+        logger = get_logger('test', log_file=log_file, log_level=logging.INFO)
 
         if metrics:
             results = dataset.evaluate(outputs, metrics, metric_options)
             for k, v in results.items():
-                print(f'\n{k} : {v:.2f}')
+                logger.info(f'{k} : {v:.2f}')
         else:
             warnings.warn('Evaluation metrics are not specified.')
             scores = np.vstack(outputs)
@@ -243,13 +249,13 @@ def evaluate_outputs(model_cfg: mmcv.Config,
                 'pred_class': pred_class
             }
             if not out:
-                print('\nthe predicted result for the first element is '
-                      f'pred_score = {pred_score[0]:.2f}, '
-                      f'pred_label = {pred_label[0]} '
-                      f'and pred_class = {pred_class[0]}. '
-                      'Specify --out to save all results to files.')
+                logger.info('the predicted result for the first element is '
+                            f'pred_score = {pred_score[0]:.2f}, '
+                            f'pred_label = {pred_label[0]} '
+                            f'and pred_class = {pred_class[0]}. '
+                            'Specify --out to save all results to files.')
         if out:
-            print(f'\nwriting results to {out}')
+            logger.debug(f'writing results to {out}')
             mmcv.dump(results, out)
 
     def get_preprocess(self) -> Dict:
diff --git a/mmdeploy/codebase/mmdet/deploy/object_detection.py b/mmdeploy/codebase/mmdet/deploy/object_detection.py
@@ -7,7 +7,7 @@
 from mmcv.parallel import DataContainer
 from torch.utils.data import Dataset
 
-from mmdeploy.utils import Task, get_root_logger
+from mmdeploy.utils import Task
 from mmdeploy.utils.config_utils import get_input_shape, is_dynamic_shape
 from ...base import BaseTask
 from .mmdetection import MMDET_TASK
@@ -235,7 +235,8 @@ def evaluate_outputs(model_cfg: mmcv.Config,
                          metrics: Optional[str] = None,
                          out: Optional[str] = None,
                          metric_options: Optional[dict] = None,
-                         format_only: bool = False):
+                         format_only: bool = False,
+                         log_file: Optional[str] = None):
         """Perform post-processing to predictions of model.
 
         Args:
@@ -252,10 +253,14 @@ def evaluate_outputs(model_cfg: mmcv.Config,
                 evaluation. It is useful when you want to format the result
                 to a specific format and submit it to the test server. Defaults
                 to `False`.
+            log_file (str | None): The file to write the evaluation results.
+                Defaults to `None` and the results will only print on stdout.
         """
+        from mmcv.utils import get_logger
+        logger = get_logger('test', log_file=log_file)
+
         if out:
-            logger = get_root_logger()
-            logger.info(f'\nwriting results to {out}')
+            logger.debug(f'writing results to {out}')
             mmcv.dump(outputs, out)
         kwargs = {} if metric_options is None else metric_options
         if format_only:
@@ -269,7 +274,7 @@ def evaluate_outputs(model_cfg: mmcv.Config,
             ]:
                 eval_kwargs.pop(key, None)
             eval_kwargs.update(dict(metric=metrics, **kwargs))
-            print(dataset.evaluate(outputs, **eval_kwargs))
+            logger.info(dataset.evaluate(outputs, **eval_kwargs))
 
     def get_preprocess(self) -> Dict:
         """Get the preprocess information for SDK.
diff --git a/mmdeploy/codebase/mmedit/deploy/super_resolution.py b/mmdeploy/codebase/mmedit/deploy/super_resolution.py
@@ -10,7 +10,7 @@
 
 from mmdeploy.codebase.base import BaseTask
 from mmdeploy.codebase.mmedit.deploy.mmediting import MMEDIT_TASK
-from mmdeploy.utils import Task, get_input_shape, get_root_logger, load_config
+from mmdeploy.utils import Task, get_input_shape, load_config
 
 
 def process_model_config(model_cfg: mmcv.Config,
@@ -249,6 +249,7 @@ def evaluate_outputs(model_cfg,
                          out: Optional[str] = None,
                          metric_options: Optional[dict] = None,
                          format_only: bool = False,
+                         log_file: Optional[str] = None,
                          **kwargs) -> None:
         """Evaluation function implemented in mmedit.
 
@@ -265,17 +266,20 @@ def evaluate_outputs(model_cfg,
                 evaluation. It is useful when you want to format the result
                 to a specific format and submit it to the test server. Defaults
                 to `False`.
+            log_file (str | None): The file to write the evaluation results.
+                Defaults to `None` and the results will only print on stdout.
         """
+        from mmcv.utils import get_logger
+        logger = get_logger('test', log_file=log_file)
+
         if out:
-            logger = get_root_logger()
-            logger.info(f'\nwriting results to {out}')
+            logger.debug(f'writing results to {out}')
             mmcv.dump(outputs, out)
         # The Dataset doesn't need metrics
-        print('\n')
         # print metrics
         stats = dataset.evaluate(outputs)
         for stat in stats:
-            print('Eval-{}: {}'.format(stat, stats[stat]))
+            logger.info('Eval-{}: {}'.format(stat, stats[stat]))
 
     def get_preprocess(self) -> Dict:
         """Get the preprocess information for SDK.
diff --git a/mmdeploy/codebase/mmocr/deploy/text_detection.py b/mmdeploy/codebase/mmocr/deploy/text_detection.py
@@ -10,7 +10,7 @@
 from torch.utils.data import Dataset
 
 from mmdeploy.codebase.base import BaseTask
-from mmdeploy.utils import Task, get_input_shape, get_root_logger
+from mmdeploy.utils import Task, get_input_shape
 from .mmocr import MMOCR_TASK
 
 
@@ -246,15 +246,16 @@ def evaluate_outputs(model_cfg,
                          metrics: Optional[str] = None,
                          out: Optional[str] = None,
                          metric_options: Optional[dict] = None,
-                         format_only: bool = False):
+                         format_only: bool = False,
+                         log_file: Optional[str] = None):
         """Perform post-processing to predictions of model.
 
         Args:
             outputs (Sequence): A list of predictions of model inference.
             dataset (Dataset): Input dataset to run test.
             model_cfg (mmcv.Config): The model config.
             metrics (str): Evaluation metrics, which depends on
-                the codebase and the dataset, e.g., e.g., "acc" for text
+                the codebase and the dataset, e.g., "acc" for text
                 recognition, and "hmean-iou" for text detection.
             out (str): Output result file in pickle format, defaults to `None`.
             metric_options (dict): Custom options for evaluation, will be
@@ -263,10 +264,14 @@ def evaluate_outputs(model_cfg,
                 evaluation. It is useful when you want to format the result
                 to a specific format and submit it to the test server. Defaults
                 to `False`.
+            log_file (str | None): The file to write the evaluation results.
+                Defaults to `None` and the results will only print on stdout.
         """
+        from mmcv.utils import get_logger
+        logger = get_logger('test', log_file=log_file)
+
         if out:
-            logger = get_root_logger()
-            logger.info(f'\nwriting results to {out}')
+            logger.debug(f'writing results to {out}')
             mmcv.dump(outputs, out)
         kwargs = {} if metric_options is None else metric_options
         if format_only:
@@ -280,7 +285,7 @@ def evaluate_outputs(model_cfg,
             ]:
                 eval_kwargs.pop(key, None)
             eval_kwargs.update(dict(metric=metrics, **kwargs))
-            print(dataset.evaluate(outputs, **eval_kwargs))
+            logger.info(dataset.evaluate(outputs, **eval_kwargs))
 
     def get_preprocess(self) -> Dict:
         """Get the preprocess information for SDK.
diff --git a/mmdeploy/codebase/mmocr/deploy/text_recognition.py b/mmdeploy/codebase/mmocr/deploy/text_recognition.py
@@ -10,7 +10,7 @@
 from torch.utils.data import Dataset
 
 from mmdeploy.codebase.base import BaseTask
-from mmdeploy.utils import Task, get_input_shape, get_root_logger
+from mmdeploy.utils import Task, get_input_shape
 from .mmocr import MMOCR_TASK
 
 
@@ -259,15 +259,16 @@ def evaluate_outputs(model_cfg: mmcv.Config,
                          metrics: Optional[str] = None,
                          out: Optional[str] = None,
                          metric_options: Optional[dict] = None,
-                         format_only: bool = False):
+                         format_only: bool = False,
+                         log_file: Optional[str] = None):
         """Perform post-processing to predictions of model.
 
         Args:
             model_cfg (mmcv.Config): The model config.
             outputs (list): A list of predictions of model inference.
             dataset (Dataset): Input dataset to run test.
             metrics (str): Evaluation metrics, which depends on
-                the codebase and the dataset, e.g., e.g., "acc" for text
+                the codebase and the dataset, e.g., "acc" for text
                 recognition, and "hmean-iou" for text detection.
             out (str): Output result file in pickle format, defaults to `None`.
             metric_options (dict): Custom options for evaluation, will be
@@ -276,10 +277,14 @@ def evaluate_outputs(model_cfg: mmcv.Config,
                 evaluation. It is useful when you want to format the result
                 to a specific format and submit it to the test server. Defaults
                 to `False`.
+            log_file (str | None): The file to write the evaluation results.
+                Defaults to `None` and the results will only print on stdout.
         """
+        from mmcv.utils import get_logger
+        logger = get_logger('test', log_file=log_file)
+
         if out:
-            logger = get_root_logger()
-            logger.info(f'\nwriting results to {out}')
+            logger.debug(f'writing results to {out}')
             mmcv.dump(outputs, out)
         kwargs = {} if metric_options is None else metric_options
         if format_only:
@@ -293,7 +298,7 @@ def evaluate_outputs(model_cfg: mmcv.Config,
             ]:
                 eval_kwargs.pop(key, None)
             eval_kwargs.update(dict(metric=metrics, **kwargs))
-            print(dataset.evaluate(outputs, **eval_kwargs))
+            logger.info(dataset.evaluate(outputs, **eval_kwargs))
 
     def get_preprocess(self) -> Dict:
         """Get the preprocess information for SDK.
diff --git a/mmdeploy/codebase/mmseg/deploy/segmentation.py b/mmdeploy/codebase/mmseg/deploy/segmentation.py
@@ -7,7 +7,7 @@
 from torch.utils.data import Dataset
 
 from mmdeploy.codebase.base import BaseTask
-from mmdeploy.utils import Task, get_input_shape, get_root_logger
+from mmdeploy.utils import Task, get_input_shape
 from .mmsegmentation import MMSEG_TASK
 
 
@@ -205,7 +205,8 @@ def evaluate_outputs(model_cfg,
                          metrics: Optional[str] = None,
                          out: Optional[str] = None,
                          metric_options: Optional[dict] = None,
-                         format_only: bool = False):
+                         format_only: bool = False,
+                         log_file: Optional[str] = None):
         """Perform post-processing to predictions of model.
 
         Args:
@@ -222,16 +223,20 @@ def evaluate_outputs(model_cfg,
                 evaluation. It is useful when you want to format the result
                 to a specific format and submit it to the test server. Defaults
                 to `False`.
+            log_file (str | None): The file to write the evaluation results.
+                Defaults to `None` and the results will only print on stdout.
         """
+        from mmcv.utils import get_logger
+        logger = get_logger('test', log_file=log_file)
+
         if out:
-            logger = get_root_logger()
-            logger.info(f'\nwriting results to {out}')
+            logger.debug(f'writing results to {out}')
             mmcv.dump(outputs, out)
         kwargs = {} if metric_options is None else metric_options
         if format_only:
             dataset.format_results(outputs, **kwargs)
         if metrics:
-            dataset.evaluate(outputs, metrics, **kwargs)
+            dataset.evaluate(outputs, metrics, logger=logger, **kwargs)
 
     def get_preprocess(self) -> Dict:
         """Get the preprocess information for SDK.
diff --git a/mmdeploy/utils/timer.py b/mmdeploy/utils/timer.py
diff --git a/tools/test.py b/tools/test.py