[feat] hyperparameter settings in model wrapper

jianwensong · fracape · commit 83309f4c8445 · 2025-10-20T11:20:07.000-07:00
diff --git a/cfgs/vision_model/default.yaml b/cfgs/vision_model/default.yaml
@@ -15,6 +15,10 @@ faster_rcnn_X_101_32x8d_FPN_3x:
   weights: "weights/detectron2/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x/139173657/model_final_68b088.pkl"
   integer_conv_weight: False
   splits : "fpn" #, "c2" or "r2"
+  hyper_params:
+    update: False 
+    conf_threshold: 0.05
+    max_dets: 100
 
 mask_rcnn_R_50_FPN_3x:
   model_path_prefix: ${..model_root_path}
@@ -48,12 +52,9 @@ jde_1088x608:
   cfg: "models/Towards-Realtime-MOT/cfg/yolov3_1088x608.cfg"
   weights: "weights/jde/jde.1088x608.uncertainty.pt"
   integer_conv_weight: False
-  iou_thres: 0.5
-  conf_thres: 0.5
-  nms_thres: 0.4
-  min_box_area: 200
-  track_buffer: 30
-  frame_rate: 30 # It is odd to consider this at here but following original code.
+  hyper_params:
+    update: False
+    conf_threshold: 0.5
   splits : [36, 61, 74] # MPEG FCM TEST with JDE on TVD
   #splits : [105, 90, 75] # MPEG FCM TEST with JDE on HiEve
 
diff --git a/compressai_vision/evaluators/evaluators.py b/compressai_vision/evaluators/evaluators.py
@@ -717,9 +717,7 @@ def digest(self, gt, pred, mse_results=None):
         pred_list = []
         for tlwh, id in zip(pred["tlwhs"], pred["ids"]):
             x1, y1, w, h = tlwh
-            if (
-                self.apply_pred_offset
-            ):  # Replicate offset applied in load_motchallenge() in motmetrics library, used in VCM eval framework to load predictions from disk
+            if self.apply_pred_offset:  # Replicate offset applied in load_motchallenge() in motmetrics library, used in VCM eval framework to load predictions from disk
                 x1 -= 1
                 y1 -= 1
             # x2, y2 = x1 + w, y1 + h
diff --git a/compressai_vision/model_wrappers/detectron2.py b/compressai_vision/model_wrappers/detectron2.py
@@ -27,11 +27,13 @@
 # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import re
+
 from enum import Enum
 from pathlib import Path
 from typing import Dict, List, Optional
 
 import torch
+
 from detectron2.checkpoint import DetectionCheckpointer
 from detectron2.config import get_cfg
 from detectron2.modeling import build_model
@@ -209,6 +211,15 @@ def __init__(self, device: str, **kwargs):
             zip(self.split_layer_list, [None] * len(self.split_layer_list))
         )
 
+        if kwargs.get("hyper_params", {}).get("update", False):
+            hyper_params = {
+                "conf_threshold": kwargs.get("hyper_params", {}).get(
+                    "conf_threshold", None
+                ),
+                "max_dets": kwargs.get("hyper_params", {}).get("max_dets", None),
+            }
+            self._apply_infer_overrides(hyper_params)
+
         assert self.top_block is not None
         assert self.proposal_generator is not None
 
@@ -313,9 +324,13 @@ def _apply_infer_overrides(self, overrides: Dict):
         """Overrides hyperparameters in roi_heads"""
 
         box_pred = getattr(self.roi_heads, "box_predictor", None)
-        if "conf_threshold" in overrides and hasattr(box_pred, "test_score_thresh"):
+        if overrides.get("conf_threshold") is not None and hasattr(
+            box_pred, "test_score_thresh"
+        ):
             box_pred.test_score_thresh = float(overrides["conf_threshold"])
-        if "max_dets" in overrides and hasattr(box_pred, "test_topk_per_image"):
+        if overrides.get("max_dets") is not None and hasattr(
+            box_pred, "test_topk_per_image"
+        ):
             box_pred.test_topk_per_image = int(overrides["max_dets"])
 
     @torch.no_grad()
diff --git a/compressai_vision/model_wrappers/jde.py b/compressai_vision/model_wrappers/jde.py
@@ -28,11 +28,13 @@
 # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import logging
+
 from pathlib import Path
 from typing import Dict, List, Optional
 
 import jde
 import torch
+
 from jde.models import Darknet
 from jde.tracker import matching
 from jde.tracker.basetrack import TrackState
@@ -78,12 +80,12 @@ def __init__(self, device: str, **kwargs):
         }
 
         self.model_configs = {
-            "iou_thres": float(kwargs["iou_thres"]),
-            "conf_thres": float(kwargs["conf_thres"]),
-            "nms_thres": float(kwargs["nms_thres"]),
-            "min_box_area": int(kwargs["min_box_area"]),
-            "track_buffer": int(kwargs["track_buffer"]),
-            "frame_rate": float(kwargs["frame_rate"]),
+            "iou_thres": float(kwargs.get("iou_thres", 0.5)),
+            "conf_thres": float(kwargs.get("conf_thres", 0.5)),
+            "nms_thres": float(kwargs.get("nms_thres", 0.4)),
+            "min_box_area": int(kwargs.get("min_box_area", 200)),
+            "track_buffer": int(kwargs.get("track_buffer", 30)),
+            "frame_rate": float(kwargs.get("frame_rate", 30)),
         }
         self.max_time_on_hold = int(
             self.model_configs["frame_rate"] / 30.0 * self.model_configs["track_buffer"]
@@ -116,6 +118,15 @@ def __init__(self, device: str, **kwargs):
             self.logger.level = kwargs["logging_level"]
             # logging.DEBUG
 
+        if kwargs.get("hyper_params", {}).get("update", False):
+            hyper_params = {
+                "conf_threshold": kwargs.get("hyper_params", {}).get(
+                    "conf_threshold", None
+                ),
+                "max_dets": kwargs.get("hyper_params", {}).get("max_dets", None),
+            }
+            self._apply_infer_overrides(hyper_params)
+
         # reset member variables to use over a sequence of frame
         self.reset()
 
@@ -210,8 +221,7 @@ def _feature_pyramid_to_output(
         return {"tlwhs": online_tlwhs, "ids": online_ids}
 
     def _apply_infer_overrides(self, overrides: Dict):
-
-        if "conf_threshold" in overrides:
+        if overrides.get("conf_threshold") is not None:
             self.model_configs["conf_thres"] = float(overrides["conf_threshold"])
 
     @torch.no_grad()
@@ -337,9 +347,7 @@ def _jde_process(self, pred, org_img_size: tuple, input_img_size: tuple):
 
         detections = [detections[i] for i in u_detection]
         # detections is now a list of the unmatched detections
-        r_tracked_stracks = (
-            []
-        )  # This is container for stracks which were tracked till the
+        r_tracked_stracks = []  # This is container for stracks which were tracked till the
         # previous frame but no detection was found for it in the current frame
         for i in u_track:
             if track_candidates_pool[i].state == TrackState.Tracked:
diff --git a/compressai_vision/pipelines/base.py b/compressai_vision/pipelines/base.py
@@ -31,13 +31,15 @@
 import json
 import logging
 import os
+
 from enum import Enum
 from pathlib import Path
 from typing import Callable, Dict, Tuple
 from uuid import uuid4 as uuid
 
 import torch
 import torch.nn as nn
+
 from omegaconf.errors import InterpolationResolutionError
 from torch import Tensor
 
@@ -180,9 +182,7 @@ def _update_codec_configs_at_pipeline_level(self, total_num_frames):
         if n_frames_to_be_encoded == -1:
             n_frames_to_be_encoded = total_num_frames
 
-        assert (
-            n_frames_to_be_encoded
-        ), f"Number of frames to be encoded must be greater than 0, but got {n_frames_to_be_encoded}"
+        assert n_frames_to_be_encoded, f"Number of frames to be encoded must be greater than 0, but got {n_frames_to_be_encoded}"
 
         if (self._codec_skip_n_frames + n_frames_to_be_encoded) > total_num_frames:
             self.logger.warning(
@@ -200,7 +200,9 @@ def _update_codec_configs_at_pipeline_level(self, total_num_frames):
             self._codec_skip_n_frames > 0
             or self._codec_n_frames_to_be_encoded != total_num_frames
         ):
-            assert self.configs["codec"][
+            assert self.configs[
+                "codec"
+            ][
                 "encode_only"
             ], "Encoding part of a sequence is only available when `codec.encode_only' is True"
 
@@ -220,8 +222,8 @@ def _prep_features_to_dump(features, n_bits, datacatalog_name):
             assert (
                 n_bits == 8 or n_bits == 16
             ), "currently it only supports dumping features in 8 bits or 16 bits"
-            assert datacatalog_name in list(
-                MIN_MAX_DATASET.keys()
+            assert (
+                datacatalog_name in list(MIN_MAX_DATASET.keys())
             ), f"{datacatalog_name} does not exist in the pre-computed minimum and maximum tables"
             minv, maxv = MIN_MAX_DATASET[datacatalog_name]
             data_features = {}
@@ -259,8 +261,8 @@ def _post_process_loaded_features(features, n_bits, datacatalog_name):
             assert (
                 n_bits == 8 or n_bits == 16
             ), "currently it only supports dumping features in 8 bits or 16 bits"
-            assert datacatalog_name in list(
-                MIN_MAX_DATASET.keys()
+            assert (
+                datacatalog_name in list(MIN_MAX_DATASET.keys())
             ), f"{datacatalog_name} does not exist in the pre-computed minimum and maximum tables"
             minv, maxv = MIN_MAX_DATASET[datacatalog_name]
             data_features = {}
@@ -488,13 +490,11 @@ def calc_feature_mse(
         input_feats: Dict[str, torch.Tensor],
         recon_feats: Dict[str, torch.Tensor],
     ) -> Dict[str, float]:
-
         mse_results: Dict[str, float] = {}
 
         keys_recon = list(recon_feats.keys())
 
         for i, key in enumerate(input_feats.keys()):
-
             x = input_feats[key].cpu()
             y = recon_feats[keys_recon[i]].cpu()
 
diff --git a/compressai_vision/pipelines/split_inference/image_split_inference.py b/compressai_vision/pipelines/split_inference/image_split_inference.py
@@ -28,9 +28,11 @@
 # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import os
+
 from typing import Dict
 
 import torch
+
 from torch.utils.data import DataLoader
 from tqdm import tqdm
 
@@ -210,19 +212,17 @@ def __call__(
             self.update_time_elapsed("nn_part_2", (time_measure() - start))
 
             if evaluator:
-                mse_results = None
-                if (
+                mse_enabled = (
                     evaluator.calculate_feature_mse
                     and not self.configs["codec"]["decode_only"]
-                ):
-                    mse_results = self.calc_feature_mse(
-                        featureT["data"], dec_features["data"]
-                    )
+                )
+                mse_results = (
+                    self.calc_feature_mse(featureT["data"], dec_features["data"])
+                    if mse_enabled
+                    else None
+                )
 
-                if mse_results:
-                    evaluator.digest(d, pred, mse_results)
-                else:
-                    evaluator.digest(d, pred)
+                evaluator.digest(d, pred, mse_results)
 
                 if getattr(self, "vis_dir", None) and hasattr(
                     evaluator, "save_visualization"
diff --git a/compressai_vision/pipelines/split_inference/video_split_inference.py b/compressai_vision/pipelines/split_inference/video_split_inference.py
@@ -29,10 +29,12 @@
 
 
 import os
+
 from itertools import repeat
 from typing import Dict, List, Tuple, TypeVar
 
 import torch
+
 from torch import Tensor
 from torch.utils.data import DataLoader
 from tqdm import tqdm
diff --git a/compressai_vision/run/eval_split_inference.py b/compressai_vision/run/eval_split_inference.py
@@ -45,11 +45,13 @@
 
 import logging
 import os
+
 from pathlib import Path
 from typing import Any
 
 import hydra
 import pandas as pd
+
 from omegaconf import DictConfig
 from tabulate import tabulate