Fix keypoint detection single obj recipe (#3915)

wonjuleee · web-flow · commit 52221e3eb53e · 2024-08-30T15:08:09.000+09:00
* add rtmpose_tiny for single obj

* modify test subset name

* fix unit test

* property for pck
diff --git a/src/otx/core/metrics/pck.py b/src/otx/core/metrics/pck.py
@@ -147,6 +147,22 @@ def __init__(
         self.label_info: LabelInfo = label_info
         self.reset()
 
+    @property
+    def input_size(self) -> tuple[int, int]:
+        """Getter for input_size."""
+        return self._input_size
+
+    @input_size.setter
+    def input_size(self, size: tuple[int, int]) -> None:
+        """Setter for input_size."""
+        if not isinstance(size, tuple) or len(size) != 2:
+            msg = "input_size must be a tuple of two integers."
+            raise ValueError(msg)
+        if not all(isinstance(dim, int) for dim in size):
+            msg = "input_size dimensions must be integers."
+            raise ValueError(msg)
+        self._input_size = size
+
     def reset(self) -> None:
         """Reset for every validation and test epoch.
 
@@ -177,7 +193,7 @@ def compute(self) -> dict:
         gt_kpts = np.stack([p[0] for p in self.targets])
         kpts_visible = np.stack([p[1] for p in self.targets])
 
-        normalize = np.tile(np.array([[256, 192]]), (pred_kpts.shape[0], 1))
+        normalize = np.tile(np.array([self.input_size]), (pred_kpts.shape[0], 1))
         _, avg_acc, _ = keypoint_pck_accuracy(
             pred_kpts,
             gt_kpts,
diff --git a/src/otx/core/model/keypoint_detection.py b/src/otx/core/model/keypoint_detection.py
@@ -104,6 +104,11 @@ def _customize_outputs(
             bbox_info=[],
         )
 
+    def configure_metric(self) -> None:
+        """Configure the metric."""
+        super().configure_metric()
+        self._metric.input_size = self.input_size
+
     def _convert_pred_entity_to_compute_metric(
         self,
         preds: KeypointDetBatchPredEntity,
diff --git a/src/otx/recipe/keypoint_detection/rtmpose_tiny.yaml b/src/otx/recipe/keypoint_detection/rtmpose_tiny.yaml
@@ -6,7 +6,7 @@ model:
     optimizer:
       class_path: torch.optim.AdamW
       init_args:
-        lr: 0.004
+        lr: 0.001
         weight_decay: 0.0001
 
     scheduler:
diff --git a/src/otx/recipe/keypoint_detection/rtmpose_tiny_single_obj.yaml b/src/otx/recipe/keypoint_detection/rtmpose_tiny_single_obj.yaml
@@ -2,6 +2,9 @@ model:
   class_path: otx.algo.keypoint_detection.rtmpose.RTMPoseTiny
   init_args:
     label_info: 17
+    input_size:
+      - 512
+      - 512
 
     optimizer:
       class_path: torch.optim.AdamW
@@ -35,47 +38,49 @@ overrides:
     - data.train_subset.transforms
     - data.val_subset.transforms
     - data.test_subset.transforms
-  input_size:
-    - 512
-    - 512
-  train_subset:
-    transforms:
-      - class_path: otx.core.data.transform_libs.torchvision.TopdownAffine
-        init_args:
-          input_size: $(input_size)
-      - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug
-        init_args:
-          is_numpy_to_tvtensor: true
-      - class_path: torchvision.transforms.v2.ToDtype
-        init_args:
-          dtype: ${as_torch_dtype:torch.float32}
-      - class_path: torchvision.transforms.v2.Normalize
-        init_args:
-          mean: [123.675, 116.28, 103.53]
-          std: [58.395, 57.12, 57.375]
-  val_subset:
-    transforms:
-      - class_path: otx.core.data.transform_libs.torchvision.TopdownAffine
-        init_args:
-          input_size: $(input_size)
-          is_numpy_to_tvtensor: true
-      - class_path: torchvision.transforms.v2.ToDtype
-        init_args:
-          dtype: ${as_torch_dtype:torch.float32}
-      - class_path: torchvision.transforms.v2.Normalize
-        init_args:
-          mean: [123.675, 116.28, 103.53]
-          std: [58.395, 57.12, 57.375]
-  test_subset:
-    transforms:
-      - class_path: otx.core.data.transform_libs.torchvision.TopdownAffine
-        init_args:
-          input_size: $(input_size)
-          is_numpy_to_tvtensor: true
-      - class_path: torchvision.transforms.v2.ToDtype
-        init_args:
-          dtype: ${as_torch_dtype:torch.float32}
-      - class_path: torchvision.transforms.v2.Normalize
-        init_args:
-          mean: [123.675, 116.28, 103.53]
-          std: [58.395, 57.12, 57.375]
+  data:
+    input_size:
+      - 512
+      - 512
+    train_subset:
+      transforms:
+        - class_path: otx.core.data.transform_libs.torchvision.RandomBBoxTransform
+        - class_path: otx.core.data.transform_libs.torchvision.TopdownAffine
+          init_args:
+            input_size: $(input_size)
+        - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug
+          init_args:
+            is_numpy_to_tvtensor: true
+        - class_path: torchvision.transforms.v2.ToDtype
+          init_args:
+            dtype: ${as_torch_dtype:torch.float32}
+        - class_path: torchvision.transforms.v2.Normalize
+          init_args:
+            mean: [123.675, 116.28, 103.53]
+            std: [58.395, 57.12, 57.375]
+    val_subset:
+      transforms:
+        - class_path: otx.core.data.transform_libs.torchvision.TopdownAffine
+          init_args:
+            input_size: $(input_size)
+            is_numpy_to_tvtensor: true
+        - class_path: torchvision.transforms.v2.ToDtype
+          init_args:
+            dtype: ${as_torch_dtype:torch.float32}
+        - class_path: torchvision.transforms.v2.Normalize
+          init_args:
+            mean: [123.675, 116.28, 103.53]
+            std: [58.395, 57.12, 57.375]
+    test_subset:
+      transforms:
+        - class_path: otx.core.data.transform_libs.torchvision.TopdownAffine
+          init_args:
+            input_size: $(input_size)
+            is_numpy_to_tvtensor: true
+        - class_path: torchvision.transforms.v2.ToDtype
+          init_args:
+            dtype: ${as_torch_dtype:torch.float32}
+        - class_path: torchvision.transforms.v2.Normalize
+          init_args:
+            mean: [123.675, 116.28, 103.53]
+            std: [58.395, 57.12, 57.375]
diff --git a/tests/perf/test_keypoint_detection.py b/tests/perf/test_keypoint_detection.py
@@ -5,6 +5,7 @@
 from __future__ import annotations
 
 from pathlib import Path
+from typing import ClassVar
 
 import pytest
 
@@ -19,26 +20,97 @@ class TestPerfKeypointDetection(PerfTestBase):
         Benchmark.Model(task="keypoint_detection", name="rtmpose_tiny", category="speed"),
     ]
 
-    DATASET_TEST_CASES = [
+    DATASET_TEST_CASES: ClassVar = [
         Benchmark.Dataset(
-            name=f"coco_person_keypoint_small_{idx}",
-            path=Path("keypoint_detection/coco_keypoint_small") / f"{idx}",
+            name="coco_person_keypoint_small",
+            path=Path("keypoint_detection/coco_keypoint/small"),
             group="small",
             num_repeat=5,
             extra_overrides={},
-        )
-        for idx in (1, 2, 3)
-    ] + [
+        ),
         Benchmark.Dataset(
             name="coco_person_keypoint_medium",
-            path=Path("keypoint_detection/coco_keypoint_medium"),
+            path=Path("keypoint_detection/coco_keypoint/medium"),
+            group="medium",
+            num_repeat=5,
+            extra_overrides={},
+        ),
+        Benchmark.Dataset(
+            name="coco_person_keypoint_large",
+            path=Path("keypoint_detection/coco_keypoint/large"),
+            group="large",
+            num_repeat=5,
+            extra_overrides={},
+        ),
+    ]
+
+    BENCHMARK_CRITERIA = [  # noqa: RUF012
+        Benchmark.Criterion(name="train/epoch", summary="max", compare="<", margin=0.1),
+        Benchmark.Criterion(name="train/e2e_time", summary="max", compare="<", margin=0.1),
+        Benchmark.Criterion(name="val/accuracy", summary="max", compare=">", margin=0.1),
+        Benchmark.Criterion(name="test/accuracy", summary="max", compare=">", margin=0.1),
+        Benchmark.Criterion(name="export/accuracy", summary="max", compare=">", margin=0.1),
+        Benchmark.Criterion(name="optimize/accuracy", summary="max", compare=">", margin=0.1),
+        Benchmark.Criterion(name="train/iter_time", summary="mean", compare="<", margin=0.1),
+        Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1),
+        Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1),
+        Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1),
+        Benchmark.Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1),
+        Benchmark.Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1),
+        Benchmark.Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1),
+    ]
+
+    @pytest.mark.parametrize(
+        "fxt_model",
+        MODEL_TEST_CASES,
+        ids=lambda model: model.name,
+        indirect=True,
+    )
+    @pytest.mark.parametrize(
+        "fxt_dataset",
+        DATASET_TEST_CASES,
+        ids=lambda dataset: dataset.name,
+        indirect=True,
+    )
+    def test_perf(
+        self,
+        fxt_model: Benchmark.Model,
+        fxt_dataset: Benchmark.Dataset,
+        fxt_benchmark: Benchmark,
+    ):
+        self._test_perf(
+            model=fxt_model,
+            dataset=fxt_dataset,
+            benchmark=fxt_benchmark,
+            criteria=self.BENCHMARK_CRITERIA,
+        )
+
+
+class TestPerfKeypointDetectionSingleObj(PerfTestBase):
+    """Benchmark visual prompting."""
+
+    MODEL_TEST_CASES = [  # noqa: RUF012
+        Benchmark.Model(task="keypoint_detection", name="rtmpose_tiny_single_obj", category="speed"),
+    ]
+
+    DATASET_TEST_CASES: ClassVar = [
+        Benchmark.Dataset(
+            name="coco_person_keypoint_single_obj_small",
+            path=Path("keypoint_detection/coco_keypoint_single_obj/small"),
+            group="small",
+            num_repeat=5,
+            extra_overrides={},
+        ),
+        Benchmark.Dataset(
+            name="coco_person_keypoint_single_obj_medium",
+            path=Path("keypoint_detection/coco_keypoint_single_obj/medium"),
             group="medium",
             num_repeat=5,
             extra_overrides={},
         ),
         Benchmark.Dataset(
-            name="mpii_large",
-            path=Path("keypoint_detection/mpii_large"),
+            name="coco_person_keypoint_single_obj_large",
+            path=Path("keypoint_detection/coco_keypoint_single_obj/large"),
             group="large",
             num_repeat=5,
             extra_overrides={},