Skip to content

Commit d0e864c

Browse files
authored
Add Dino head unit tests (#2344)
Recover DINO head unit tests
1 parent 66e8a67 commit d0e864c

File tree

2 files changed

+216
-0
lines changed

2 files changed

+216
-0
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"""Unit tests for src/otx/algorithms/detection/adapters/mmdet/models/heads."""
2+
# Copyright (C) 2023 Intel Corporation
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
"""Unit tests for CustomDINOHead."""
2+
# Copyright (C) 2023 Intel Corporation
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
6+
import numpy as np
7+
import pytest
8+
import torch
9+
from mmcv.utils import ConfigDict
10+
from mmdet.core import build_assigner
11+
from mmdet.models.builder import build_detector
12+
13+
from tests.test_suite.e2e_test_system import e2e_pytest_unit
14+
15+
16+
class TestCustomDINOHead:
17+
@pytest.fixture(autouse=True)
18+
def setup(self):
19+
torch.manual_seed(5)
20+
cfg = ConfigDict(
21+
dict(
22+
type="CustomDINOHead",
23+
num_query=900,
24+
num_classes=80,
25+
in_channels=2048,
26+
sync_cls_avg_factor=True,
27+
with_box_refine=True,
28+
as_two_stage=True,
29+
transformer=dict(
30+
type="CustomDINOTransformer",
31+
encoder=dict(
32+
type="DetrTransformerEncoder",
33+
num_layers=6,
34+
transformerlayers=dict(
35+
type="BaseTransformerLayer",
36+
attn_cfgs=dict(type="MultiScaleDeformableAttention", embed_dims=256, dropout=0.0),
37+
feedforward_channels=2048,
38+
ffn_dropout=0.0,
39+
operation_order=("self_attn", "norm", "ffn", "norm"),
40+
),
41+
),
42+
decoder=dict(
43+
type="DINOTransformerDecoder",
44+
num_layers=6,
45+
return_intermediate=True,
46+
transformerlayers=dict(
47+
type="DetrTransformerDecoderLayer",
48+
attn_cfgs=[
49+
dict(type="MultiheadAttention", embed_dims=256, num_heads=8, dropout=0.0),
50+
dict(type="MultiScaleDeformableAttention", embed_dims=256, dropout=0.0),
51+
],
52+
feedforward_channels=2048,
53+
ffn_dropout=0.0,
54+
operation_order=("self_attn", "norm", "cross_attn", "norm", "ffn", "norm"),
55+
),
56+
),
57+
),
58+
positional_encoding=dict(
59+
type="SinePositionalEncoding", num_feats=128, normalize=True, offset=0.0, temperature=20
60+
),
61+
loss_cls=dict(type="FocalLoss", use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0),
62+
loss_bbox=dict(type="L1Loss", loss_weight=5.0),
63+
loss_iou=dict(type="GIoULoss", loss_weight=2.0),
64+
dn_cfg=dict(
65+
label_noise_scale=0.5,
66+
box_noise_scale=1.0, # 0.4 for DN-DETR
67+
group_cfg=dict(dynamic=True, num_groups=None, num_dn_queries=100),
68+
),
69+
),
70+
)
71+
self.bbox_head = build_detector(cfg)
72+
73+
assigner_cfg = ConfigDict(
74+
type="HungarianAssigner",
75+
cls_cost=dict(type="FocalLossCost", weight=1.0),
76+
reg_cost=dict(type="BBoxL1Cost", weight=5.0, box_format="xywh"),
77+
iou_cost=dict(type="IoUCost", iou_mode="giou", weight=2.0),
78+
)
79+
self.bbox_head.assigner = build_assigner(assigner_cfg)
80+
81+
test_cfg = dict(max_per_img=300)
82+
self.bbox_head.test_cfg = test_cfg
83+
84+
@e2e_pytest_unit
85+
def test_forward_train(self):
86+
inputs = [
87+
torch.zeros([2, 256, 92, 95]),
88+
torch.zeros([2, 256, 46, 48]),
89+
torch.zeros([2, 256, 23, 24]),
90+
torch.zeros([2, 256, 12, 12]),
91+
]
92+
gt_bboxes = [
93+
torch.Tensor(
94+
[
95+
[432.2500, 514.2661, 632.6323, 638.8889],
96+
[361.2484, 294.9931, 558.4751, 466.9410],
97+
[616.8542, 201.9204, 752.5462, 328.1207],
98+
[591.6091, 386.4883, 733.6124, 571.0562],
99+
[728.8790, 255.5556, 760.0000, 408.5734],
100+
[713.1008, 397.5309, 760.0000, 541.0837],
101+
[246.0680, 354.9383, 427.5165, 498.4911],
102+
[113.5316, 361.2483, 309.1805, 517.4211],
103+
[457.4950, 654.6639, 646.8326, 736.0000],
104+
[132.4654, 631.0014, 187.6889, 684.6365],
105+
[217.6673, 694.1015, 298.1358, 736.0000],
106+
[0.0000, 583.6763, 56.7303, 672.0164],
107+
[86.7088, 675.1714, 168.7551, 736.0000],
108+
[173.4885, 93.0727, 253.9570, 151.4403],
109+
[738.3458, 119.8903, 760.0000, 164.0603],
110+
[683.1224, 522.1536, 760.0000, 736.0000],
111+
]
112+
),
113+
torch.Tensor(
114+
[
115+
[442.0, 279.0, 544.0, 377.0],
116+
[386.0, 1.0, 497.0, 108.0],
117+
[288.0, 1.0, 399.0, 84.0],
118+
[154.0, 1.0, 268.0, 77.0],
119+
[530.0, 163.0, 625.0, 248.0],
120+
[179.0, 298.0, 278.0, 398.0],
121+
[275.0, 320.0, 374.0, 420.0],
122+
[525.0, 394.0, 613.0, 480.0],
123+
[332.0, 160.0, 463.0, 286.0],
124+
[210.0, 395.0, 308.0, 480.0],
125+
[141.0, 395.0, 239.0, 480.0],
126+
[106.0, 225.0, 204.0, 310.0],
127+
[12.0, 1.0, 148.0, 70.0],
128+
[165.0, 79.0, 396.0, 247.0],
129+
[483.0, 13.0, 518.0, 52.0],
130+
],
131+
),
132+
]
133+
gt_labels = [
134+
torch.Tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 2]).long(),
135+
torch.Tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0]).long(),
136+
]
137+
img_metas = [
138+
{
139+
"flip_direction": "horizontal",
140+
"img_shape": (736, 760, 3),
141+
"ori_shape": (480, 640, 3),
142+
"img_norm_cfg": {
143+
"mean": np.array([123.675, 116.28, 103.53], dtype=np.float32),
144+
"std": np.array([58.395, 57.12, 57.375], dtype=np.float32),
145+
"to_rgb": False,
146+
},
147+
"scale_factor": np.array([1.5139443, 1.5144033, 1.5139443, 1.5144033], dtype=np.float32),
148+
"flip": True,
149+
"pad_shape": (736, 760, 3),
150+
"batch_input_shape": (736, 760),
151+
},
152+
{
153+
"flip_direction": "horizontal",
154+
"img_shape": (480, 640, 3),
155+
"ori_shape": (480, 640, 3),
156+
"img_norm_cfg": {
157+
"mean": np.array([123.675, 116.28, 103.53], dtype=np.float32),
158+
"std": np.array([58.395, 57.12, 57.375], dtype=np.float32),
159+
"to_rgb": False,
160+
},
161+
"scale_factor": np.array([1.0, 1.0, 1.0, 1.0], dtype=np.float32),
162+
"flip": True,
163+
"pad_shape": (480, 640, 3),
164+
"batch_input_shape": (736, 760),
165+
},
166+
]
167+
losses = self.bbox_head.forward_train(inputs, img_metas, gt_bboxes, gt_labels)
168+
assert len(losses) == 39
169+
170+
@e2e_pytest_unit
171+
def test_simple_test_bboxes(self):
172+
feats = [
173+
torch.zeros([2, 256, 100, 134]),
174+
torch.zeros([2, 256, 50, 67]),
175+
torch.zeros([2, 256, 25, 34]),
176+
torch.zeros([2, 256, 13, 17]),
177+
]
178+
img_metas = [
179+
{
180+
"ori_shape": (480, 640, 3),
181+
"img_shape": (800, 1067, 3),
182+
"pad_shape": (800, 1067, 3),
183+
"scale_factor": np.array([1.6671875, 1.6666666, 1.6671875, 1.6666666], dtype=np.float32),
184+
"flip": False,
185+
"flip_direction": None,
186+
"img_norm_cfg": {
187+
"mean": np.array([123.675, 116.28, 103.53], dtype=np.float32),
188+
"std": np.array([58.395, 57.12, 57.375], dtype=np.float32),
189+
"to_rgb": False,
190+
},
191+
"batch_input_shape": (800, 1067),
192+
},
193+
{
194+
"ori_shape": (480, 640, 3),
195+
"img_shape": (800, 1067, 3),
196+
"pad_shape": (800, 1067, 3),
197+
"scale_factor": np.array([1.6671875, 1.6666666, 1.6671875, 1.6666666], dtype=np.float32),
198+
"flip": False,
199+
"flip_direction": None,
200+
"img_norm_cfg": {
201+
"mean": np.array([123.675, 116.28, 103.53], dtype=np.float32),
202+
"std": np.array([58.395, 57.12, 57.375], dtype=np.float32),
203+
"to_rgb": False,
204+
},
205+
"batch_input_shape": (800, 1067),
206+
},
207+
]
208+
self.bbox_head.eval()
209+
results = self.bbox_head.simple_test_bboxes(feats, img_metas)
210+
assert len(results) == 2
211+
assert results[0][0].shape == torch.Size([300, 5])
212+
assert results[0][1].shape == torch.Size([300])

0 commit comments

Comments
 (0)