update policy

kprokofi · kprokofi · commit 5063ecbac664 · 2025-11-26T23:50:06.000+09:00
diff --git a/library/src/otx/backend/native/models/detection/backbones/dinov3sta.py b/library/src/otx/backend/native/models/detection/backbones/dinov3sta.py
@@ -182,15 +182,13 @@ class DINOv3STAs(nn.Module):
             "name": "dinov3_vits16plus",
             "weights_path": None,
             "interaction_indexes": [5,8,11],
-            "finetune": True,
             "conv_inplane": 64,
             "hidden_dim": 256
         },
         "deimv2_l" : {
             "name": "dinov3_vits16",
             "weights_path": None,
             "interaction_indexes": [5,8,11],
-            "finetune": True,
             "conv_inplane": 32,
             "hidden_dim": 224,
         },
diff --git a/library/src/otx/backend/native/models/detection/heads/deim_decoder.py b/library/src/otx/backend/native/models/detection/heads/deim_decoder.py
@@ -597,7 +597,7 @@ def _get_decoder_input(self,
             enc_topk_bbox_unact = torch.concat([denoising_bbox_unact, enc_topk_bbox_unact], dim=1)
             content = torch.concat([denoising_logits, content], dim=1)
 
-        return content, enc_topk_bbox_unact, enc_topk_bboxes_list, enc_topk_logits_list
+        return content, enc_topk_bbox_unact, enc_topk_bboxes_list, enc_topk_logits_list, enc_outputs_logits
 
     def _select_topk(self, memory: torch.Tensor, outputs_logits: torch.Tensor, outputs_anchors_unact: torch.Tensor, topk: int):
         if self.query_select_method == 'default':
@@ -623,7 +623,11 @@ def _select_topk(self, memory: torch.Tensor, outputs_logits: torch.Tensor, outpu
 
         return topk_memory, topk_logits, topk_anchors
 
-    def forward(self, feats, targets=None):
+    def forward(self,
+                feats,
+                targets=None,
+                explain_mode: bool = False,
+                ):
         # input projection and embedding
         memory, spatial_shapes = self._get_encoder_input(feats)
 
@@ -641,7 +645,7 @@ def forward(self, feats, targets=None):
         else:
             denoising_logits, denoising_bbox_unact, attn_mask, dn_meta = None, None, None, None
 
-        init_ref_contents, init_ref_points_unact, enc_topk_bboxes_list, enc_topk_logits_list = \
+        init_ref_contents, init_ref_points_unact, enc_topk_bboxes_list, enc_topk_logits_list, enc_outputs_logits = \
             self._get_decoder_input(memory, spatial_shapes, denoising_logits, denoising_bbox_unact)
 
         # decoder
@@ -660,6 +664,8 @@ def forward(self, feats, targets=None):
             attn_mask=attn_mask,
             dn_meta=dn_meta)
 
+        out_bboxes = out_bboxes.clamp(min=1e-8)
+
         if self.training and dn_meta is not None:
             # the output from the first decoder layer, only one
             dn_pre_logits, pre_logits = torch.split(pre_logits, dn_meta['dn_num_split'], dim=1)
@@ -690,6 +696,9 @@ def forward(self, feats, targets=None):
                 out['dn_pre_outputs'] = {'pred_logits': dn_pre_logits, 'pred_boxes': dn_pre_bboxes}
                 out['dn_meta'] = dn_meta
 
+        if explain_mode:
+            out["raw_logits"] = enc_outputs_logits
+
         return out
 
 
diff --git a/library/src/otx/backend/native/models/detection/necks/dfine_hybrid_encoder.py b/library/src/otx/backend/native/models/detection/necks/dfine_hybrid_encoder.py
@@ -857,7 +857,6 @@ class HybridEncoder:
         "deimv2_x": {
             "in_channels": [256, 256, 256],
             "hidden_dim": 256,
-            "feat_strides": [8, 16, 32],
             "dim_feedforward": 1024,
             "expansion": 1.25,
             "depth_mult": 1.37
diff --git a/library/src/otx/recipe/detection/deimv2_l.yaml b/library/src/otx/recipe/detection/deimv2_l.yaml
@@ -56,7 +56,7 @@ callbacks:
       data_aug_switch:
         class_path: otx.backend.native.callbacks.aug_scheduler.DataAugSwitch
         init_args:
-          policy_epochs: [4, 30, 50]
+          policy_epochs: [4, 23, 40]
           policies:
             no_aug:
               to_tv_image: false
diff --git a/library/src/otx/recipe/detection/deimv2_s.yaml b/library/src/otx/recipe/detection/deimv2_s.yaml
@@ -56,7 +56,7 @@ callbacks:
       data_aug_switch:
         class_path: otx.backend.native.callbacks.aug_scheduler.DataAugSwitch
         init_args:
-          policy_epochs: [4, 23, 40]
+          policy_epochs: [4, 40, 70]
           policies:
             no_aug:
               to_tv_image: false
diff --git a/library/tests/perf_v2/benchmark.py b/library/tests/perf_v2/benchmark.py
@@ -127,6 +127,7 @@ def train(
         dataset_info: DatasetInfo,
         sub_work_dir: Path,
         seed: int,
+        num_devices: int = 1,
     ) -> float:
         """Train model with given dataset and return the total time.
 
@@ -145,7 +146,7 @@ def train(
             dataset_info=dataset_info,
             work_dir=sub_work_dir / SubCommand.TRAIN.value,
         )
-
+        engine.num_devices = num_devices
         kwargs = {}
         if dataset_info.extra_overrides:
             kwargs.update(dataset_info.extra_overrides.get("train", {}))
@@ -329,7 +330,7 @@ def _initialize_engine(
             config_path=FOLDER_MAPPINGS[model_info.task] / (model_info.name + ".yaml"),
             data_root=self.data_root / dataset_info.path,
             work_dir=work_dir,
-            device=self.accelerator,
+            device=self.accelerator
         )
 
     def run(
@@ -338,6 +339,7 @@ def run(
         dataset_info: DatasetInfo,
         seed: int,
         criteria: list[Criterion],
+        num_devices: int = 1,
     ) -> pd.DataFrame | None:
         """Run configured benchmark with given dataset and model and return the result.
 
@@ -381,6 +383,7 @@ def run(
                     dataset_info=dataset_info,
                     sub_work_dir=sub_work_dir,
                     seed=seed,
+                    num_devices=num_devices
                 )
 
                 self._log_metrics(
@@ -653,6 +656,7 @@ def check(self, result: pd.DataFrame, criteria: list[Criterion]):
         dataset_info=dataset_info,
         seed=args.seed,
         criteria=criteria,
+        num_devices=args.num_devices,
     )
     benchmark.check(
         result=result,
diff --git a/library/tests/perf_v2/tasks/detection.py b/library/tests/perf_v2/tasks/detection.py
@@ -21,12 +21,13 @@
     # ModelInfo(task=TASK_TYPE.value, name="atss_mobilenetv2", category="default"),
     # ModelInfo(task=TASK_TYPE.value, name="yolox_s", category="speed"),
     # ModelInfo(task=TASK_TYPE.value, name="dfine_x", category="accuracy"),
-    ModelInfo(task=TASK_TYPE.value, name="deim_dfine_x", category="accuracy"),
-    ModelInfo(task=TASK_TYPE.value, name="deim_dfine_l", category="accuracy"),
-    ModelInfo(task=TASK_TYPE.value, name="deim_dfine_m", category="accuracy"),
-    ModelInfo(task=TASK_TYPE.value, name="deimv2_x", category="accuracy"),
-    ModelInfo(task=TASK_TYPE.value, name="deimv2_l", category="accuracy"),
-    ModelInfo(task=TASK_TYPE.value, name="deimv2_m", category="accuracy"),
+    # ModelInfo(task=TASK_TYPE.value, name="deim_dfine_x", category="other"),
+    ModelInfo(task=TASK_TYPE.value, name="deimv2_x", category="other"),
+    # ModelInfo(task=TASK_TYPE.value, name="deim_dfine_l", category="other"),
+    ModelInfo(task=TASK_TYPE.value, name="deimv2_l", category="other"),
+    # ModelInfo(task=TASK_TYPE.value, name="deim_dfine_m", category="other"),
+    ModelInfo(task=TASK_TYPE.value, name="deimv2_m", category="other"),
+    ModelInfo(task=TASK_TYPE.value, name="deimv2_s", category="other"),
     # ModelInfo(task=TASK_TYPE.value, name="atss_resnext101", category="other"),
     # ModelInfo(task=TASK_TYPE.value, name="rtdetr_101", category="other"),
     # ModelInfo(task=TASK_TYPE.value, name="rtdetr_18", category="other"),
@@ -41,31 +42,25 @@
 DATASET_TEST_CASES = (
     [
         DatasetInfo(
-            name=f"pothole_tiny_{idx}",
-            path=Path("detection/pothole_coco_tiny") / f"{idx}",
+            name=f"pothole_tiny_2",
+            path=Path("detection/pothole_coco_tiny/2"),
             group="tiny",
-        )
-        for idx in (1, 2, 3)
-    ]
-    + [
+        ),
         DatasetInfo(
-            name=f"blueberry_tiny_{idx}",
-            path=Path("detection/blueberry_tiny_coco") / f"{idx}",
+            name=f"blueberry_small",
+            path=Path("detection/blue_berry_small"),
             group="tiny",
-        )
-        for idx in (1, 2, 3)
-    ]
-    + [
+        ),
         DatasetInfo(
             name="wgisd_small",
             path=Path("detection/wgisd_merged_coco_small"),
             group="small",
         ),
-        DatasetInfo(
-            name="skindetect",
-            path=Path("detection/skindetect-roboflow"),
-            group="small",
-        ),
+        # DatasetInfo(
+        #     name="skindetect",
+        #     path=Path("detection/skindetect-roboflow"),
+        #     group="small",
+        # ),
         DatasetInfo(
             name="diopsis",
             path=Path("detection/diopsis_coco"),
@@ -76,11 +71,11 @@
             path=Path("detection/bdd_medium"),
             group="medium",
         ),
-        DatasetInfo(
-            name="Vitens-Aeromonas",
-            path=Path("detection/Vitens-Aeromonas-coco"),
-            group="medium",
-        ),
+        # DatasetInfo(
+        #     name="Vitens-Aeromonas",
+        #     path=Path("detection/Vitens-Aeromonas-coco"),
+        #     group="medium",
+        # ),
         DatasetInfo(
             name="visdrone",
             path=Path("detection/visdrone_coco_custom_split"),
diff --git a/library/tests/perf_v2/utils.py b/library/tests/perf_v2/utils.py
@@ -352,4 +352,10 @@ def get_parser() -> ArgumentParser:
         default="gpu",
         help="Which device to use.",
     )
+    parser.add_argument(
+        "--num-devices",
+        type=int,
+        default=1,
+        help="How much devices to use during training.",
+    )
     return parser