Skip to content

Commit 52221e3

Browse files
authored
Fix keypoint detection single obj recipe (#3915)
* add rtmpose_tiny for single obj * modify test subset name * fix unit test * property for pck
1 parent 4c8555e commit 52221e3

File tree

5 files changed

+153
-55
lines changed

5 files changed

+153
-55
lines changed

src/otx/core/metrics/pck.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,22 @@ def __init__(
147147
self.label_info: LabelInfo = label_info
148148
self.reset()
149149

150+
@property
151+
def input_size(self) -> tuple[int, int]:
152+
"""Getter for input_size."""
153+
return self._input_size
154+
155+
@input_size.setter
156+
def input_size(self, size: tuple[int, int]) -> None:
157+
"""Setter for input_size."""
158+
if not isinstance(size, tuple) or len(size) != 2:
159+
msg = "input_size must be a tuple of two integers."
160+
raise ValueError(msg)
161+
if not all(isinstance(dim, int) for dim in size):
162+
msg = "input_size dimensions must be integers."
163+
raise ValueError(msg)
164+
self._input_size = size
165+
150166
def reset(self) -> None:
151167
"""Reset for every validation and test epoch.
152168
@@ -177,7 +193,7 @@ def compute(self) -> dict:
177193
gt_kpts = np.stack([p[0] for p in self.targets])
178194
kpts_visible = np.stack([p[1] for p in self.targets])
179195

180-
normalize = np.tile(np.array([[256, 192]]), (pred_kpts.shape[0], 1))
196+
normalize = np.tile(np.array([self.input_size]), (pred_kpts.shape[0], 1))
181197
_, avg_acc, _ = keypoint_pck_accuracy(
182198
pred_kpts,
183199
gt_kpts,

src/otx/core/model/keypoint_detection.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,11 @@ def _customize_outputs(
104104
bbox_info=[],
105105
)
106106

107+
def configure_metric(self) -> None:
108+
"""Configure the metric."""
109+
super().configure_metric()
110+
self._metric.input_size = self.input_size
111+
107112
def _convert_pred_entity_to_compute_metric(
108113
self,
109114
preds: KeypointDetBatchPredEntity,

src/otx/recipe/keypoint_detection/rtmpose_tiny.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ model:
66
optimizer:
77
class_path: torch.optim.AdamW
88
init_args:
9-
lr: 0.004
9+
lr: 0.001
1010
weight_decay: 0.0001
1111

1212
scheduler:

src/otx/recipe/keypoint_detection/rtmpose_tiny_single_obj.yaml

Lines changed: 49 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ model:
22
class_path: otx.algo.keypoint_detection.rtmpose.RTMPoseTiny
33
init_args:
44
label_info: 17
5+
input_size:
6+
- 512
7+
- 512
58

69
optimizer:
710
class_path: torch.optim.AdamW
@@ -35,47 +38,49 @@ overrides:
3538
- data.train_subset.transforms
3639
- data.val_subset.transforms
3740
- data.test_subset.transforms
38-
input_size:
39-
- 512
40-
- 512
41-
train_subset:
42-
transforms:
43-
- class_path: otx.core.data.transform_libs.torchvision.TopdownAffine
44-
init_args:
45-
input_size: $(input_size)
46-
- class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug
47-
init_args:
48-
is_numpy_to_tvtensor: true
49-
- class_path: torchvision.transforms.v2.ToDtype
50-
init_args:
51-
dtype: ${as_torch_dtype:torch.float32}
52-
- class_path: torchvision.transforms.v2.Normalize
53-
init_args:
54-
mean: [123.675, 116.28, 103.53]
55-
std: [58.395, 57.12, 57.375]
56-
val_subset:
57-
transforms:
58-
- class_path: otx.core.data.transform_libs.torchvision.TopdownAffine
59-
init_args:
60-
input_size: $(input_size)
61-
is_numpy_to_tvtensor: true
62-
- class_path: torchvision.transforms.v2.ToDtype
63-
init_args:
64-
dtype: ${as_torch_dtype:torch.float32}
65-
- class_path: torchvision.transforms.v2.Normalize
66-
init_args:
67-
mean: [123.675, 116.28, 103.53]
68-
std: [58.395, 57.12, 57.375]
69-
test_subset:
70-
transforms:
71-
- class_path: otx.core.data.transform_libs.torchvision.TopdownAffine
72-
init_args:
73-
input_size: $(input_size)
74-
is_numpy_to_tvtensor: true
75-
- class_path: torchvision.transforms.v2.ToDtype
76-
init_args:
77-
dtype: ${as_torch_dtype:torch.float32}
78-
- class_path: torchvision.transforms.v2.Normalize
79-
init_args:
80-
mean: [123.675, 116.28, 103.53]
81-
std: [58.395, 57.12, 57.375]
41+
data:
42+
input_size:
43+
- 512
44+
- 512
45+
train_subset:
46+
transforms:
47+
- class_path: otx.core.data.transform_libs.torchvision.RandomBBoxTransform
48+
- class_path: otx.core.data.transform_libs.torchvision.TopdownAffine
49+
init_args:
50+
input_size: $(input_size)
51+
- class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug
52+
init_args:
53+
is_numpy_to_tvtensor: true
54+
- class_path: torchvision.transforms.v2.ToDtype
55+
init_args:
56+
dtype: ${as_torch_dtype:torch.float32}
57+
- class_path: torchvision.transforms.v2.Normalize
58+
init_args:
59+
mean: [123.675, 116.28, 103.53]
60+
std: [58.395, 57.12, 57.375]
61+
val_subset:
62+
transforms:
63+
- class_path: otx.core.data.transform_libs.torchvision.TopdownAffine
64+
init_args:
65+
input_size: $(input_size)
66+
is_numpy_to_tvtensor: true
67+
- class_path: torchvision.transforms.v2.ToDtype
68+
init_args:
69+
dtype: ${as_torch_dtype:torch.float32}
70+
- class_path: torchvision.transforms.v2.Normalize
71+
init_args:
72+
mean: [123.675, 116.28, 103.53]
73+
std: [58.395, 57.12, 57.375]
74+
test_subset:
75+
transforms:
76+
- class_path: otx.core.data.transform_libs.torchvision.TopdownAffine
77+
init_args:
78+
input_size: $(input_size)
79+
is_numpy_to_tvtensor: true
80+
- class_path: torchvision.transforms.v2.ToDtype
81+
init_args:
82+
dtype: ${as_torch_dtype:torch.float32}
83+
- class_path: torchvision.transforms.v2.Normalize
84+
init_args:
85+
mean: [123.675, 116.28, 103.53]
86+
std: [58.395, 57.12, 57.375]

tests/perf/test_keypoint_detection.py

Lines changed: 81 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from __future__ import annotations
66

77
from pathlib import Path
8+
from typing import ClassVar
89

910
import pytest
1011

@@ -19,26 +20,97 @@ class TestPerfKeypointDetection(PerfTestBase):
1920
Benchmark.Model(task="keypoint_detection", name="rtmpose_tiny", category="speed"),
2021
]
2122

22-
DATASET_TEST_CASES = [
23+
DATASET_TEST_CASES: ClassVar = [
2324
Benchmark.Dataset(
24-
name=f"coco_person_keypoint_small_{idx}",
25-
path=Path("keypoint_detection/coco_keypoint_small") / f"{idx}",
25+
name="coco_person_keypoint_small",
26+
path=Path("keypoint_detection/coco_keypoint/small"),
2627
group="small",
2728
num_repeat=5,
2829
extra_overrides={},
29-
)
30-
for idx in (1, 2, 3)
31-
] + [
30+
),
3231
Benchmark.Dataset(
3332
name="coco_person_keypoint_medium",
34-
path=Path("keypoint_detection/coco_keypoint_medium"),
33+
path=Path("keypoint_detection/coco_keypoint/medium"),
34+
group="medium",
35+
num_repeat=5,
36+
extra_overrides={},
37+
),
38+
Benchmark.Dataset(
39+
name="coco_person_keypoint_large",
40+
path=Path("keypoint_detection/coco_keypoint/large"),
41+
group="large",
42+
num_repeat=5,
43+
extra_overrides={},
44+
),
45+
]
46+
47+
BENCHMARK_CRITERIA = [ # noqa: RUF012
48+
Benchmark.Criterion(name="train/epoch", summary="max", compare="<", margin=0.1),
49+
Benchmark.Criterion(name="train/e2e_time", summary="max", compare="<", margin=0.1),
50+
Benchmark.Criterion(name="val/accuracy", summary="max", compare=">", margin=0.1),
51+
Benchmark.Criterion(name="test/accuracy", summary="max", compare=">", margin=0.1),
52+
Benchmark.Criterion(name="export/accuracy", summary="max", compare=">", margin=0.1),
53+
Benchmark.Criterion(name="optimize/accuracy", summary="max", compare=">", margin=0.1),
54+
Benchmark.Criterion(name="train/iter_time", summary="mean", compare="<", margin=0.1),
55+
Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1),
56+
Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1),
57+
Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1),
58+
Benchmark.Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1),
59+
Benchmark.Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1),
60+
Benchmark.Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1),
61+
]
62+
63+
@pytest.mark.parametrize(
64+
"fxt_model",
65+
MODEL_TEST_CASES,
66+
ids=lambda model: model.name,
67+
indirect=True,
68+
)
69+
@pytest.mark.parametrize(
70+
"fxt_dataset",
71+
DATASET_TEST_CASES,
72+
ids=lambda dataset: dataset.name,
73+
indirect=True,
74+
)
75+
def test_perf(
76+
self,
77+
fxt_model: Benchmark.Model,
78+
fxt_dataset: Benchmark.Dataset,
79+
fxt_benchmark: Benchmark,
80+
):
81+
self._test_perf(
82+
model=fxt_model,
83+
dataset=fxt_dataset,
84+
benchmark=fxt_benchmark,
85+
criteria=self.BENCHMARK_CRITERIA,
86+
)
87+
88+
89+
class TestPerfKeypointDetectionSingleObj(PerfTestBase):
90+
"""Benchmark visual prompting."""
91+
92+
MODEL_TEST_CASES = [ # noqa: RUF012
93+
Benchmark.Model(task="keypoint_detection", name="rtmpose_tiny_single_obj", category="speed"),
94+
]
95+
96+
DATASET_TEST_CASES: ClassVar = [
97+
Benchmark.Dataset(
98+
name="coco_person_keypoint_single_obj_small",
99+
path=Path("keypoint_detection/coco_keypoint_single_obj/small"),
100+
group="small",
101+
num_repeat=5,
102+
extra_overrides={},
103+
),
104+
Benchmark.Dataset(
105+
name="coco_person_keypoint_single_obj_medium",
106+
path=Path("keypoint_detection/coco_keypoint_single_obj/medium"),
35107
group="medium",
36108
num_repeat=5,
37109
extra_overrides={},
38110
),
39111
Benchmark.Dataset(
40-
name="mpii_large",
41-
path=Path("keypoint_detection/mpii_large"),
112+
name="coco_person_keypoint_single_obj_large",
113+
path=Path("keypoint_detection/coco_keypoint_single_obj/large"),
42114
group="large",
43115
num_repeat=5,
44116
extra_overrides={},

0 commit comments

Comments
 (0)