3535import numpy as np
3636import torch
3737from jde .utils .datasets import letterbox
38+ from mmpose .structures .bbox import get_warp_matrix
3839from torchvision import transforms
3940
40- __all__ = ["YOLOXCustomMapper" , "JDECustomMapper" , "LinearMapper" ]
41+ __all__ = ["MMPOSECustomMapper" , " YOLOXCustomMapper" , "JDECustomMapper" , "LinearMapper" ]
4142
4243
4344def yolox_style_scaling (img , input_size , padding = False ):
@@ -58,6 +59,112 @@ def yolox_style_scaling(img, input_size, padding=False):
5859 return resized_img
5960
6061
62+ class MMPOSECustomMapper :
63+ """
64+ A callable which takes a dataset dict in CompressAI-Vision generic dataset format, but for MMPOSE (particularly, RTMO model) evaluation,
65+ and map it into a format used by the model.
66+
67+ This is the default callable to be used to map your dataset dict into inference data.
68+
69+ This callable function refers to
70+ preproc function at
71+ <https://github.com/open-mmlab/mmpose/blob/dev-1.x/mmpose/datasets/transforms/bottomup_transforms.py>
72+
73+ Full license statement can be found at
74+ <https://github.com/open-mmlab/mmpose?tab=Apache-2.0-1-ov-file#readme>
75+
76+ """
77+
78+ def __init__ (
79+ self ,
80+ img_size = [640 , 640 ],
81+ size_factor = 32 ,
82+ pad_val = [114 , 114 , 114 ],
83+ aug_transforms = None ,
84+ ):
85+ """
86+ Args:
87+ img_size: expected input size (Height, Width)
88+ """
89+
90+ self .input_img_size = img_size
91+ self .pad_val = pad_val
92+ assert img_size [0 ] % size_factor == 0 and img_size [1 ] % size_factor == 0
93+
94+ if aug_transforms != None :
95+ self .aug_transforms = aug_transforms
96+ else :
97+ self .aug_transforms = transforms .Compose ([transforms .ToTensor ()])
98+
99+ def compute_scale_and_center (self , src_img_width , src_img_height ):
100+ _input_h , _input_w = self .input_img_size
101+ _ratio = src_img_width / src_img_height
102+ _scaled_input_w = min (_input_w , _input_h * _ratio )
103+ _scaled_input_h = min (_input_h , _input_w / _ratio )
104+
105+ center = np .array ([src_img_width / 2 , src_img_height / 2 ], dtype = np .float32 )
106+ scale = np .array (
107+ [
108+ src_img_width * _input_w / _scaled_input_w ,
109+ src_img_height * _input_h / _scaled_input_h ,
110+ ],
111+ dtype = np .float32 ,
112+ )
113+
114+ return scale , center
115+
116+ def __call__ (self , dataset_dict ):
117+ """
118+ Args:
119+ dataset_dict (dict): Metadata of one image.
120+
121+ Returns:
122+ dict: a format that compressai-vision pipelines accept
123+ """
124+
125+ dataset_dict = copy .deepcopy (dataset_dict )
126+ # the copied dictionary will be modified by code below
127+
128+ dataset_dict .pop ("annotations" , None )
129+
130+ # tried to replicate the implemetation of the original codes
131+ # Read image
132+ org_img = cv2 .imread (dataset_dict ["file_name" ]) # return img in BGR by default
133+
134+ assert (
135+ len (org_img .shape ) == 3
136+ ), f"detect an input image with 2 chs, { dataset_dict ['file_name' ]} "
137+
138+ img_h , img_w , _ = org_img .shape
139+
140+ dataset_dict ["height" ] = img_h
141+ dataset_dict ["width" ] = img_w
142+
143+ _input_h , _input_w = self .input_img_size
144+ # mmpose style scaling
145+ scale , center = self .compute_scale_and_center (img_w , img_h )
146+
147+ warp_mat = get_warp_matrix (
148+ center = center , scale = scale , rot = 0 , output_size = (_input_w , _input_h )
149+ )
150+
151+ resized_img = cv2 .warpAffine (
152+ org_img ,
153+ warp_mat ,
154+ (_input_w , _input_h ),
155+ flags = cv2 .INTER_LINEAR ,
156+ borderValue = self .pad_val ,
157+ )
158+
159+ tensor_image = self .aug_transforms (
160+ np .ascontiguousarray (resized_img , dtype = np .float32 )
161+ )
162+
163+ dataset_dict ["image" ] = tensor_image
164+
165+ return dataset_dict
166+
167+
61168class YOLOXCustomMapper :
62169 """
63170 A callable which takes a dataset dict in CompressAI-Vision generic dataset format, but for YOLOX evaluation,
0 commit comments