@@ -431,7 +431,7 @@ def __init__(
431431 num_denoising : int = 100 ,
432432 label_noise_ratio : float = 0.5 ,
433433 box_noise_scale : float = 1.0 ,
434- eval_spatial_size : list [int ] = [ 640 , 640 ], # noqa: B006
434+ eval_spatial_size : tuple [int , int ] = ( 640 , 640 ),
435435 eval_idx : int = - 1 ,
436436 reg_scale : float = 4.0 ,
437437 reg_max : int = 32 ,
@@ -693,7 +693,6 @@ def _get_decoder_input(
693693
694694 if memory .shape [0 ] > 1 :
695695 anchors = anchors .repeat (memory .shape [0 ], 1 , 1 )
696-
697696 memory = valid_mask .to (memory .dtype ) * memory
698697
699698 output_memory = self .enc_output (memory )
@@ -933,26 +932,22 @@ class DFINETransformer:
933932 "num_decoder_layers" : 3 ,
934933 "eval_idx" : - 1 ,
935934 "num_points_list" : [6 , 6 ],
936- "eval_spatial_size" : [640 , 640 ],
937935 },
938936 "dfine_hgnetv2_s" : {
939937 "feat_channels" : [256 , 256 , 256 ],
940938 "num_decoder_layers" : 3 ,
941939 "eval_idx" : - 1 ,
942- "eval_spatial_size" : [640 , 640 ],
943940 "num_points_list" : [3 , 6 , 3 ],
944941 },
945942 "dfine_hgnetv2_m" : {
946943 "num_decoder_layers" : 4 ,
947944 "eval_idx" : - 1 ,
948- "eval_spatial_size" : [640 , 640 ],
949945 },
950946 "dfine_hgnetv2_l" : {},
951947 "dfine_hgnetv2_x" : {
952948 "feat_channels" : [384 , 384 , 384 ],
953949 "reg_scale" : 8.0 ,
954950 "eval_idx" : - 1 ,
955- "eval_spatial_size" : [640 , 640 ],
956951 },
957952 "deim_dfine_hgnetv2_n" : {
958953 "feat_channels" : [128 , 128 ],
@@ -963,21 +958,18 @@ class DFINETransformer:
963958 "num_decoder_layers" : 3 ,
964959 "eval_idx" : - 1 ,
965960 "num_points_list" : [6 , 6 ],
966- "eval_spatial_size" : [640 , 640 ],
967961 "activation" : nn .SiLU ,
968962 },
969963 "deim_dfine_hgnetv2_s" : {
970964 "feat_channels" : [256 , 256 , 256 ],
971965 "num_decoder_layers" : 3 ,
972966 "eval_idx" : - 1 ,
973- "eval_spatial_size" : [640 , 640 ],
974967 "num_points_list" : [3 , 6 , 3 ],
975968 "activation" : nn .SiLU ,
976969 },
977970 "deim_dfine_hgnetv2_m" : {
978971 "num_decoder_layers" : 4 ,
979972 "eval_idx" : - 1 ,
980- "eval_spatial_size" : [640 , 640 ],
981973 "activation" : nn .SiLU ,
982974 },
983975 "deim_dfine_hgnetv2_l" : {
@@ -987,12 +979,13 @@ class DFINETransformer:
987979 "feat_channels" : [384 , 384 , 384 ],
988980 "reg_scale" : 8.0 ,
989981 "eval_idx" : - 1 ,
990- "eval_spatial_size" : [640 , 640 ],
991982 "activation" : nn .SiLU ,
992983 },
993984 }
994985
995- def __new__ (cls , model_name : str , num_classes : int ) -> DFINETransformerModule :
986+ def __new__ (
987+ cls , model_name : str , num_classes : int , eval_spatial_size : tuple [int , int ] = (640 , 640 )
988+ ) -> DFINETransformerModule :
996989 """Constructor for DFINETransformerModule."""
997990 cfg = cls .decoder_cfg [model_name ]
998- return DFINETransformerModule (num_classes = num_classes , ** cfg )
991+ return DFINETransformerModule (num_classes = num_classes , eval_spatial_size = eval_spatial_size , ** cfg )
0 commit comments