Merge pull request #2659 from openvinotoolkit/mergeback/140to150

yunchu · web-flow · commit a49d58f3ab16 · 2023-11-21T17:39:00.000+09:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -44,6 +44,7 @@ All notable changes to this project will be documented in this file.
 - Update ModelAPI configuration(<https://github.com/openvinotoolkit/training_extensions/pull/2564>)
 - Add Anomaly modelAPI changes (<https://github.com/openvinotoolkit/training_extensions/pull/2563>)
 - Update Image numpy access (<https://github.com/openvinotoolkit/training_extensions/pull/2586>)
+- Make max_num_detections configurable (<https://github.com/openvinotoolkit/training_extensions/pull/2647>)
 
 ### Bug fixes
 
diff --git a/src/otx/algorithms/common/configs/training_base.py b/src/otx/algorithms/common/configs/training_base.py
@@ -1,18 +1,7 @@
 """Base Configuration of OTX Common Algorithms."""
 
-# Copyright (C) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions
-# and limitations under the License.
+# Copyright (C) 2022-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
 
 from sys import maxsize
 
@@ -241,6 +230,16 @@ class BasePostprocessing(ParameterGroup):
             affects_outcome_of=ModelLifecycle.INFERENCE,
         )
 
+        max_num_detections = configurable_integer(
+            header="Maximum number of detection per image",
+            description="Extra detection outputs will be discared in non-maximum suppression process. "
+            "Defaults to 0, which means per-model default value.",
+            default_value=0,
+            min_value=0,
+            max_value=10000,
+            affects_outcome_of=ModelLifecycle.INFERENCE,
+        )
+
         use_ellipse_shapes = configurable_boolean(
             default_value=False,
             header="Use ellipse shapes",
diff --git a/src/otx/algorithms/detection/adapters/mmdet/configurer.py b/src/otx/algorithms/detection/adapters/mmdet/configurer.py
@@ -37,6 +37,25 @@ def configure_model(self, cfg, data_classes, model_classes, ir_options, **kwargs
         """Configuration for model config."""
         super().configure_model(cfg, data_classes, model_classes, ir_options, **kwargs)
         self.configure_regularization(cfg)
+        self.configure_max_num_detections(cfg, kwargs.get("max_num_detections", 0))
+
+    def configure_max_num_detections(self, cfg, max_num_detections):
+        """Patch config for maximum number of detections."""
+        if max_num_detections > 0:
+            logger.info(f"Model max_num_detections: {max_num_detections}")
+            test_cfg = cfg.model.test_cfg
+            test_cfg.max_per_img = max_num_detections
+            test_cfg.nms_pre = max_num_detections * 10
+            # Special cases for 2-stage detectors (e.g. MaskRCNN)
+            if hasattr(test_cfg, "rpn"):
+                test_cfg.rpn.nms_pre = max_num_detections * 20
+                test_cfg.rpn.max_per_img = max_num_detections * 10
+            if hasattr(test_cfg, "rcnn"):
+                test_cfg.rcnn.max_per_img = max_num_detections
+            train_cfg = cfg.model.train_cfg
+            if hasattr(train_cfg, "rpn_proposal"):
+                train_cfg.rpn_proposal.nms_pre = max_num_detections * 20
+                train_cfg.rpn_proposal.max_per_img = max_num_detections * 10
 
     def configure_regularization(self, cfg):  # noqa: C901
         """Patch regularization parameters."""
diff --git a/src/otx/algorithms/detection/adapters/mmdet/task.py b/src/otx/algorithms/detection/adapters/mmdet/task.py
@@ -177,6 +177,7 @@ def configure(self, training=True, ir_options=None, train_dataset=None, export=F
             model_classes,
             self._input_size,
             train_dataset=train_dataset,
+            max_num_detections=self.max_num_detections,
         )
         if should_cluster_anchors(self._recipe_cfg):
             if train_dataset is not None:
@@ -485,6 +486,12 @@ def _export_model(
         assert len(self._precision) == 1
         export_options["precision"] = str(self._precision[0])
         export_options["type"] = str(export_format)
+        if self.max_num_detections > 0:
+            logger.info(f"Export max_num_detections: {self.max_num_detections}")
+            post_proc_cfg = export_options["deploy_cfg"]["codebase_config"]["post_processing"]
+            post_proc_cfg["max_output_boxes_per_class"] = self.max_num_detections
+            post_proc_cfg["keep_top_k"] = self.max_num_detections
+            post_proc_cfg["pre_top_k"] = self.max_num_detections * 10
 
         export_options["deploy_cfg"]["dump_features"] = dump_features
         if dump_features:
diff --git a/src/otx/algorithms/detection/adapters/openvino/task.py b/src/otx/algorithms/detection/adapters/openvino/task.py
@@ -1,18 +1,7 @@
 """Openvino Task of Detection."""
 
-# Copyright (C) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions
-# and limitations under the License.
+# Copyright (C) 2021-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
 
 import copy
 import io
diff --git a/src/otx/algorithms/detection/configs/base/configuration.py b/src/otx/algorithms/detection/configs/base/configuration.py
@@ -1,18 +1,7 @@
 """Configuration file of OTX Detection."""
 
-# Copyright (C) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions
-# and limitations under the License.
+# Copyright (C) 2022-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
 
 from attr import attrs
 
diff --git a/src/otx/algorithms/detection/configs/detection/configuration.yaml b/src/otx/algorithms/detection/configs/detection/configuration.yaml
@@ -294,6 +294,25 @@ postprocessing:
     value: 0.01
     visible_in_ui: true
     warning: null
+  max_num_detections:
+    affects_outcome_of: INFERENCE
+    default_value: 0
+    description:
+      Extra detection outputs will be discared in non-maximum suppression process.
+      Defaults to 0, which means per-model default values.
+    editable: true
+    header: Maximum number of detections per image
+    max_value: 10000
+    min_value: 0
+    type: INTEGER
+    ui_rules:
+      action: DISABLE_EDITING
+      operator: AND
+      rules: []
+      type: UI_RULES
+    value: 0
+    visible_in_ui: true
+    warning: null
   use_ellipse_shapes:
     affects_outcome_of: INFERENCE
     default_value: false
diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/configuration.yaml b/src/otx/algorithms/detection/configs/instance_segmentation/configuration.yaml
@@ -294,6 +294,25 @@ postprocessing:
     value: 0.01
     visible_in_ui: true
     warning: null
+  max_num_detections:
+    affects_outcome_of: INFERENCE
+    default_value: 0
+    description:
+      Extra detection outputs will be discared in non-maximum suppression process.
+      Defaults to 0, which means per-model default values.
+    editable: true
+    header: Maximum number of detections per image
+    max_value: 10000
+    min_value: 0
+    type: INTEGER
+    ui_rules:
+      action: DISABLE_EDITING
+      operator: AND
+      rules: []
+      type: UI_RULES
+    value: 0
+    visible_in_ui: true
+    warning: null
   use_ellipse_shapes:
     affects_outcome_of: INFERENCE
     default_value: false
diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/convnext_maskrcnn/model.py b/src/otx/algorithms/detection/configs/instance_segmentation/convnext_maskrcnn/model.py
@@ -115,9 +115,7 @@
             nms=dict(type="nms", iou_threshold=0.7),
             min_bbox_size=0,
         ),
-        rcnn=dict(
-            score_thr=0.05, nms=dict(type="nms", iou_threshold=0.5, max_num=100), max_per_img=100, mask_thr_binary=0.5
-        ),
+        rcnn=dict(score_thr=0.05, nms=dict(type="nms", iou_threshold=0.5), max_per_img=100, mask_thr_binary=0.5),
     ),
 )
 
diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/model.py b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/model.py
@@ -1,18 +1,7 @@
 """Model configuration of Resnet50-MaskRCNN model for Instance-Seg Task."""
 
-# Copyright (C) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions
-# and limitations under the License.
+# Copyright (C) 2022-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
 
 # pylint: disable=invalid-name
 
@@ -149,7 +138,7 @@
         ),
         rcnn=dict(
             score_thr=0.05,
-            nms=dict(type="nms", iou_threshold=0.5, max_num=100),
+            nms=dict(type="nms", iou_threshold=0.5),
             max_per_img=100,
             mask_thr_binary=0.5,
         ),
diff --git a/src/otx/algorithms/detection/configs/rotated_detection/configuration.yaml b/src/otx/algorithms/detection/configs/rotated_detection/configuration.yaml
@@ -313,6 +313,25 @@ postprocessing:
     warning: null
   type: PARAMETER_GROUP
   visible_in_ui: true
+  max_num_detections:
+    affects_outcome_of: INFERENCE
+    default_value: 0
+    description:
+      Extra detection outputs will be discared in non-maximum suppression process.
+      Defaults to 0, which means per-model default values.
+    editable: true
+    header: Maximum number of detections per image
+    max_value: 10000
+    min_value: 0
+    type: INTEGER
+    ui_rules:
+      action: DISABLE_EDITING
+      operator: AND
+      rules: []
+      type: UI_RULES
+    value: 0
+    visible_in_ui: true
+    warning: null
 algo_backend:
   description: parameters for algo backend
   header: Algo backend parameters
diff --git a/src/otx/algorithms/detection/configs/rotated_detection/resnet50_maskrcnn/model.py b/src/otx/algorithms/detection/configs/rotated_detection/resnet50_maskrcnn/model.py
@@ -139,7 +139,7 @@
         ),
         rcnn=dict(
             score_thr=0.05,
-            nms=dict(type="nms", iou_threshold=0.5, max_num=100),
+            nms=dict(type="nms", iou_threshold=0.5),
             max_per_img=100,
             mask_thr_binary=0.5,
         ),
diff --git a/src/otx/algorithms/detection/task.py b/src/otx/algorithms/detection/task.py
@@ -75,14 +75,13 @@ def __init__(self, task_environment: TaskEnvironment, output_path: Optional[str]
         )
         self._anchors: Dict[str, int] = {}
 
-        if (
-            hasattr(self._hyperparams, "postprocessing")
-            and not getattr(self._hyperparams.postprocessing, "result_based_confidence_threshold", False)
-            and hasattr(self._hyperparams.postprocessing, "confidence_threshold")
-        ):
-            self.confidence_threshold = self._hyperparams.postprocessing.confidence_threshold
-        else:
-            self.confidence_threshold = 0.0
+        self.confidence_threshold = 0.0
+        self.max_num_detections = 0
+        if hasattr(self._hyperparams, "postprocessing"):
+            if hasattr(self._hyperparams.postprocessing, "confidence_threshold"):
+                self.confidence_threshold = self._hyperparams.postprocessing.confidence_threshold
+            if hasattr(self._hyperparams.postprocessing, "max_num_detections"):
+                self.max_num_detections = self._hyperparams.postprocessing.max_num_detections
 
         if task_environment.model is not None:
             self._load_model()
@@ -115,6 +114,12 @@ def _load_postprocessing(self, model_data):
             hparams.use_ellipse_shapes = loaded_postprocessing["use_ellipse_shapes"]["value"]
         else:
             hparams.use_ellipse_shapes = False
+        if "max_num_detections" in loaded_postprocessing:
+            trained_max_num_detections = loaded_postprocessing["max_num_detections"]["value"]
+            # Prefer new hparam value set by user (>0) intentionally than trained value
+            if self.max_num_detections == 0:
+                self.max_num_detections = trained_max_num_detections
+
         # If confidence threshold is adaptive then up-to-date value should be stored in the model
         # and should not be changed during inference. Otherwise user-specified value should be taken.
         if hparams.result_based_confidence_threshold:
diff --git a/tests/integration/cli/detection/test_detection.py b/tests/integration/cli/detection/test_detection.py
@@ -42,6 +42,8 @@
         "1",
         "--learning_parameters.batch_size",
         "4",
+        "--postprocessing.max_num_detections",
+        "200",
     ],
 }
 
diff --git a/tests/integration/cli/detection/test_tiling_detection.py b/tests/integration/cli/detection/test_tiling_detection.py
@@ -1,5 +1,5 @@
 """Tests for OTX Class-Incremental Learning for object detection with OTX CLI"""
-# Copyright (C) 2022 Intel Corporation
+# Copyright (C) 2022-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 import os
@@ -36,6 +36,8 @@
         "1",
         "--tiling_parameters.enable_adaptive_params",
         "1",
+        "--postprocessing.max_num_detections",
+        "200",
     ],
 }
 
diff --git a/tests/integration/cli/instance_segmentation/test_instance_segmentation.py b/tests/integration/cli/instance_segmentation/test_instance_segmentation.py
@@ -39,6 +39,8 @@
         "1",
         "--learning_parameters.batch_size",
         "2",
+        "--postprocessing.max_num_detections",
+        "200",
     ],
 }
 
@@ -54,6 +56,8 @@
         "1",
         "--learning_parameters.batch_size",
         "2",
+        "--postprocessing.max_num_detections",
+        "200",
     ],
 }
 
@@ -64,6 +68,8 @@
     "2",
     "--learning_parameters.batch_size",
     "2",
+    "--postprocessing.max_num_detections",
+    "200",
 ]
 
 otx_dir = os.getcwd()
diff --git a/tests/integration/cli/instance_segmentation/test_tiling_instseg.py b/tests/integration/cli/instance_segmentation/test_tiling_instseg.py
@@ -1,5 +1,5 @@
 """Tests for OTX Class-Incremental Learning for instance segmentation with OTX CLI"""
-# Copyright (C) 2022-2023 Intel Corporation
+# Copyright (C) 2022-2023-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 import copy
@@ -42,6 +42,8 @@
         "1",
         "--tiling_parameters.enable_adaptive_params",
         "1",
+        "--postprocessing.max_num_detections",
+        "200",
     ],
 }
 
diff --git a/tests/unit/algorithms/detection/adapters/mmdet/test_configurer.py b/tests/unit/algorithms/detection/adapters/mmdet/test_configurer.py