Skip to content

Commit bfece5d

Browse files
authored
Fix input size configuration during validation for DFine model (#4666)
1 parent c94476f commit bfece5d

File tree

2 files changed

+6
-12
lines changed

2 files changed

+6
-12
lines changed

lib/src/otx/backend/native/models/detection/d_fine.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ def _create_model(self, num_classes: int | None = None) -> DETR:
9292
decoder = DFINETransformer(
9393
model_name=self.model_name,
9494
num_classes=num_classes,
95+
eval_spatial_size=self.data_input_params.input_size,
9596
)
9697
criterion = DFINECriterion(
9798
weight_dict={

lib/src/otx/backend/native/models/detection/heads/dfine_decoder.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ def __init__(
431431
num_denoising: int = 100,
432432
label_noise_ratio: float = 0.5,
433433
box_noise_scale: float = 1.0,
434-
eval_spatial_size: list[int] = [640, 640], # noqa: B006
434+
eval_spatial_size: tuple[int, int] = (640, 640),
435435
eval_idx: int = -1,
436436
reg_scale: float = 4.0,
437437
reg_max: int = 32,
@@ -693,7 +693,6 @@ def _get_decoder_input(
693693

694694
if memory.shape[0] > 1:
695695
anchors = anchors.repeat(memory.shape[0], 1, 1)
696-
697696
memory = valid_mask.to(memory.dtype) * memory
698697

699698
output_memory = self.enc_output(memory)
@@ -933,26 +932,22 @@ class DFINETransformer:
933932
"num_decoder_layers": 3,
934933
"eval_idx": -1,
935934
"num_points_list": [6, 6],
936-
"eval_spatial_size": [640, 640],
937935
},
938936
"dfine_hgnetv2_s": {
939937
"feat_channels": [256, 256, 256],
940938
"num_decoder_layers": 3,
941939
"eval_idx": -1,
942-
"eval_spatial_size": [640, 640],
943940
"num_points_list": [3, 6, 3],
944941
},
945942
"dfine_hgnetv2_m": {
946943
"num_decoder_layers": 4,
947944
"eval_idx": -1,
948-
"eval_spatial_size": [640, 640],
949945
},
950946
"dfine_hgnetv2_l": {},
951947
"dfine_hgnetv2_x": {
952948
"feat_channels": [384, 384, 384],
953949
"reg_scale": 8.0,
954950
"eval_idx": -1,
955-
"eval_spatial_size": [640, 640],
956951
},
957952
"deim_dfine_hgnetv2_n": {
958953
"feat_channels": [128, 128],
@@ -963,21 +958,18 @@ class DFINETransformer:
963958
"num_decoder_layers": 3,
964959
"eval_idx": -1,
965960
"num_points_list": [6, 6],
966-
"eval_spatial_size": [640, 640],
967961
"activation": nn.SiLU,
968962
},
969963
"deim_dfine_hgnetv2_s": {
970964
"feat_channels": [256, 256, 256],
971965
"num_decoder_layers": 3,
972966
"eval_idx": -1,
973-
"eval_spatial_size": [640, 640],
974967
"num_points_list": [3, 6, 3],
975968
"activation": nn.SiLU,
976969
},
977970
"deim_dfine_hgnetv2_m": {
978971
"num_decoder_layers": 4,
979972
"eval_idx": -1,
980-
"eval_spatial_size": [640, 640],
981973
"activation": nn.SiLU,
982974
},
983975
"deim_dfine_hgnetv2_l": {
@@ -987,12 +979,13 @@ class DFINETransformer:
987979
"feat_channels": [384, 384, 384],
988980
"reg_scale": 8.0,
989981
"eval_idx": -1,
990-
"eval_spatial_size": [640, 640],
991982
"activation": nn.SiLU,
992983
},
993984
}
994985

995-
def __new__(cls, model_name: str, num_classes: int) -> DFINETransformerModule:
986+
def __new__(
987+
cls, model_name: str, num_classes: int, eval_spatial_size: tuple[int, int] = (640, 640)
988+
) -> DFINETransformerModule:
996989
"""Constructor for DFINETransformerModule."""
997990
cfg = cls.decoder_cfg[model_name]
998-
return DFINETransformerModule(num_classes=num_classes, **cfg)
991+
return DFINETransformerModule(num_classes=num_classes, eval_spatial_size=eval_spatial_size, **cfg)

0 commit comments

Comments
 (0)