InterDigitalInc
diff --git a/‎README.MD‎
Lines changed: 6 additions & 3 deletions b/‎README.MD‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎cfgs/evaluator/default.yaml‎
Lines changed: 2 additions & 2 deletions b/‎cfgs/evaluator/default.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cfgs/vision_model/default.yaml‎
Lines changed: 7 additions & 1 deletion b/‎cfgs/vision_model/default.yaml‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎compressai_vision/config/config.py‎
Lines changed: 1 addition & 3 deletions b/‎compressai_vision/config/config.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎compressai_vision/datasets/image.py‎
Lines changed: 43 additions & 1 deletion b/‎compressai_vision/datasets/image.py‎
Lines changed: 43 additions & 1 deletion
diff --git a/‎compressai_vision/datasets/utils.py‎
Lines changed: 108 additions & 1 deletion b/‎compressai_vision/datasets/utils.py‎
Lines changed: 108 additions & 1 deletion
@@ -16,11 +16,13 @@ It currently focuses on two types of pipeline:
 
 ## Features
 
-- [Detectron2](https://detectron2.readthedocs.io/en/latest/index.html) is used for object detection (Faster-RCNN) and instance segmentation (Mask-RCNN)
+- [Detectron2](https://detectron2.readthedocs.io/en/latest/index.html) for Object Detection (Faster-RCNN) and Instance Segmentation (Mask-RCNN)
 
-- [JDE](https://github.com/Zhongdao/Towards-Realtime-MOT) is used for Object Tracking
+- [JDE](https://github.com/Zhongdao/Towards-Realtime-MOT) for Object Tracking
 
-- [YOLOX-Darknet53](https://github.com/Megvii-BaseDetection/YOLOX) is used for object detection
+- [YOLOX-Darknet53](https://github.com/Megvii-BaseDetection/YOLOX) for Object Detection
+
+- [MMPOSE RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo) for Pose Estimation (Bottom Up)
 
 ## Documentation
 
@@ -139,3 +141,4 @@ Fabien Racapé, Hyomin Choi, Eimran Eimon, Sampsa Riikonen, Jacky Yat-Hong Lam
  * [Detectron2](https://detectron2.readthedocs.io/en/latest/index.html)
  * [JDE](https://github.com/Zhongdao/Towards-Realtime-MOT.git)
  * [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX)
+ * [MMPOSE RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo)
@@ -1,4 +1,4 @@
 type: "COCO-EVAL"
-output: "${pipeline.evaluation.evaluation_dir}"
+output_dir: "${pipeline.evaluation.evaluation_dir}"
 overwrite_results: False
-eval_criteria: ""
+eval_criteria: ""
@@ -47,4 +47,10 @@ yolox_darknet53:
   nms_thres: 0.65
   weights: "weights/yolox/darknet53/yolox_darknet.pth"
   splits: "l13" #"l37"
-  squeeze_at_split: False
+  squeeze_at_split: False
+
+rtmo_multi_person_pose_estimation:
+  model_path_prefix: ${..model_root_path}
+  cfg: "models/mmpose/configs/body_2d_keypoint/rtmo/coco/rtmo-l_16xb16-600e_coco-640x640.py"
+  weights: "weights/mmpose/rtmo_coco/rtmo-l_16xb16-600e_coco-640x640-516a421f_20231211.pth"
+  splits: "backbone"
@@ -122,9 +122,7 @@ def create_evaluator(
     if conf.type is None:
         return None
 
-    return EVALUATORS[conf.type](
-        catalog, datasetname, dataset, conf.output, conf.eval_criteria
-    )
+    return EVALUATORS[conf.type](catalog, datasetname, dataset, **dict(conf))
 
 
 def create_pipline(conf: DictConfig, device: DictConfig):
 
@@ -47,7 +47,7 @@
 
 from compressai_vision.registry import register_datacatalog, register_dataset
 
-from .utils import JDECustomMapper, LinearMapper, YOLOXCustomMapper
+from .utils import JDECustomMapper, LinearMapper, MMPOSECustomMapper, YOLOXCustomMapper
 
 
 def manual_load_data(path, ext):
@@ -337,6 +337,48 @@ def __len__(self):
         return len(self.mapDataset)
 
 
+@register_dataset("MMPOSEDataset")
+class MMPOSEDataset(BaseDataset):
+    def __init__(self, root, dataset_name, imgs_folder, **kwargs):
+        super().__init__(root, dataset_name, imgs_folder, **kwargs)
+
+        self.dataset = kwargs["dataset"].dataset
+
+        self.sampler = InferenceSampler(len(kwargs["dataset"]))
+        self.collate_fn = bypass_collator
+
+        _dataset = DatasetFromList(self.dataset, copy=False)
+
+        if kwargs["linear_mapper"] is True:
+            mapper = LinearMapper()
+        else:
+            mapper = MMPOSECustomMapper(kwargs["patch_size"])
+
+        self.input_size = kwargs["patch_size"]
+        self.mapDataset = MapDataset(_dataset, mapper)
+        self._org_mapper_func = PicklableWrapper(
+            MMPOSECustomMapper(kwargs["patch_size"])
+        )
+
+        metaData = MetadataCatalog.get(dataset_name)
+        try:
+            self.thing_classes = metaData.thing_classes
+            self.thing_dataset_id_to_contiguous_id = (
+                metaData.thing_dataset_id_to_contiguous_id
+            )
+        except AttributeError:
+            self.logger.warning("No attribute: thing_classes")
+
+    def get_org_mapper_func(self):
+        return self._org_mapper_func
+
+    def __getitem__(self, idx):
+        return self.mapDataset[idx]
+
+    def __len__(self):
+        return len(self.mapDataset)
+
+
 class DataCatalog:
     def __init__(
         self,
 
@@ -35,9 +35,10 @@
 import numpy as np
 import torch
 from jde.utils.datasets import letterbox
+from mmpose.structures.bbox import get_warp_matrix
 from torchvision import transforms
 
-__all__ = ["YOLOXCustomMapper", "JDECustomMapper", "LinearMapper"]
+__all__ = ["MMPOSECustomMapper", "YOLOXCustomMapper", "JDECustomMapper", "LinearMapper"]
 
 
 def yolox_style_scaling(img, input_size, padding=False):
@@ -58,6 +59,112 @@ def yolox_style_scaling(img, input_size, padding=False):
     return resized_img
 
 
+class MMPOSECustomMapper:
+    """
+    A callable which takes a dataset dict in CompressAI-Vision generic dataset format, but for MMPOSE (particularly, RTMO model) evaluation,
+    and map it into a format used by the model.
+
+    This is the default callable to be used to map your dataset dict into inference data.
+
+    This callable function refers to
+        preproc function at
+        <https://github.com/open-mmlab/mmpose/blob/dev-1.x/mmpose/datasets/transforms/bottomup_transforms.py>
+
+        Full license statement can be found at
+        <https://github.com/open-mmlab/mmpose?tab=Apache-2.0-1-ov-file#readme>
+
+    """
+
+    def __init__(
+        self,
+        img_size=[640, 640],
+        size_factor=32,
+        pad_val=[114, 114, 114],
+        aug_transforms=None,
+    ):
+        """
+        Args:
+            img_size: expected input size (Height, Width)
+        """
+
+        self.input_img_size = img_size
+        self.pad_val = pad_val
+        assert img_size[0] % size_factor == 0 and img_size[1] % size_factor == 0
+
+        if aug_transforms != None:
+            self.aug_transforms = aug_transforms
+        else:
+            self.aug_transforms = transforms.Compose([transforms.ToTensor()])
+
+    def compute_scale_and_center(self, src_img_width, src_img_height):
+        _input_h, _input_w = self.input_img_size
+        _ratio = src_img_width / src_img_height
+        _scaled_input_w = min(_input_w, _input_h * _ratio)
+        _scaled_input_h = min(_input_h, _input_w / _ratio)
+
+        center = np.array([src_img_width / 2, src_img_height / 2], dtype=np.float32)
+        scale = np.array(
+            [
+                src_img_width * _input_w / _scaled_input_w,
+                src_img_height * _input_h / _scaled_input_h,
+            ],
+            dtype=np.float32,
+        )
+
+        return scale, center
+
+    def __call__(self, dataset_dict):
+        """
+        Args:
+            dataset_dict (dict): Metadata of one image.
+
+        Returns:
+            dict: a format that compressai-vision pipelines accept
+        """
+
+        dataset_dict = copy.deepcopy(dataset_dict)
+        # the copied dictionary will be modified by code below
+
+        dataset_dict.pop("annotations", None)
+
+        # tried to replicate the implemetation of the original codes
+        # Read image
+        org_img = cv2.imread(dataset_dict["file_name"])  # return img in BGR by default
+
+        assert (
+            len(org_img.shape) == 3
+        ), f"detect an input image with 2 chs, {dataset_dict['file_name']}"
+
+        img_h, img_w, _ = org_img.shape
+
+        dataset_dict["height"] = img_h
+        dataset_dict["width"] = img_w
+
+        _input_h, _input_w = self.input_img_size
+        # mmpose style scaling
+        scale, center = self.compute_scale_and_center(img_w, img_h)
+
+        warp_mat = get_warp_matrix(
+            center=center, scale=scale, rot=0, output_size=(_input_w, _input_h)
+        )
+
+        resized_img = cv2.warpAffine(
+            org_img,
+            warp_mat,
+            (_input_w, _input_h),
+            flags=cv2.INTER_LINEAR,
+            borderValue=self.pad_val,
+        )
+
+        tensor_image = self.aug_transforms(
+            np.ascontiguousarray(resized_img, dtype=np.float32)
+        )
+
+        dataset_dict["image"] = tensor_image
+
+        return dataset_dict
+
+
 class YOLOXCustomMapper:
     """
     A callable which takes a dataset dict in CompressAI-Vision generic dataset format, but for YOLOX evaluation,