2626
2727
2828class KeypointDetectionModel (ImageModel ):
29- """
30- A wrapper that implements a basic keypoint regression model.
31- """
29+ """A wrapper that implements a basic keypoint regression model."""
3230
3331 __model__ = "keypoint_detection"
3432
35- def __init__ (self , inference_adapter , configuration = dict (), preload = False ):
36- """
37- Initializes the keypoint detection model.
33+ def __init__ (self , inference_adapter , configuration : dict = {}, preload = False ):
34+ """Initializes the keypoint detection model.
3835
3936 Args:
4037 inference_adapter (InferenceAdapter): inference adapter containing the underlying model.
4138 configuration (dict, optional): configuration overrides the model parameters (see parameters() method).
42- Defaults to dict() .
39+ Defaults to {} .
4340 preload (bool, optional): forces inference adapter to load the model. Defaults to False.
4441 """
4542 super ().__init__ (inference_adapter , configuration , preload )
4643 self ._check_io_number (1 , 2 )
44+ self .apply_softmax : bool
4745
4846 def postprocess (
49- self , outputs : dict [str , np .ndarray ], meta : dict [str , Any ]
47+ self ,
48+ outputs : dict [str , np .ndarray ],
49+ meta : dict [str , Any ],
5050 ) -> DetectedKeypoints :
51- """
52- Applies SCC decoded to the model outputs.
51+ """Applies SCC decoded to the model outputs.
5352
5453 Args:
5554 outputs (dict[str, np.ndarray]): raw outputs of the model
@@ -60,12 +59,26 @@ def postprocess(
6059 """
6160 encoded_kps = list (outputs .values ())
6261 batch_keypoints , batch_scores = _decode_simcc (
63- * encoded_kps , apply_softmax = self .apply_softmax
62+ encoded_kps [0 ],
63+ encoded_kps [1 ],
64+ apply_softmax = self .apply_softmax ,
6465 )
6566 orig_h , orig_w = meta ["original_shape" ][:2 ]
6667 kp_scale_h = orig_h / self .h
6768 kp_scale_w = orig_w / self .w
68- batch_keypoints = batch_keypoints .squeeze () * np .array ([kp_scale_w , kp_scale_h ])
69+
70+ batch_keypoints = batch_keypoints .squeeze ()
71+
72+ if self .resize_type in ["fit_to_window" , "fit_to_window_letterbox" ]:
73+ inverted_scale = max (kp_scale_h , kp_scale_w )
74+ kp_scale_h = kp_scale_w = inverted_scale
75+ if self .resize_type == "fit_to_window_letterbox" :
76+ pad_left = (self .w - round (orig_w / inverted_scale )) // 2
77+ pad_top = (self .h - round (orig_h / inverted_scale )) // 2
78+ batch_keypoints -= np .array ([pad_left , pad_top ])
79+
80+ batch_keypoints *= np .array ([kp_scale_w , kp_scale_h ])
81+
6982 return DetectedKeypoints (batch_keypoints , batch_scores .squeeze ())
7083
7184 @classmethod
@@ -74,13 +87,15 @@ def parameters(cls) -> dict:
7487 parameters .update (
7588 {
7689 "labels" : ListValue (
77- description = "List of class labels" , value_type = str , default_value = []
90+ description = "List of class labels" ,
91+ value_type = str ,
92+ default_value = [],
7893 ),
7994 "apply_softmax" : BooleanValue (
8095 default_value = True ,
8196 description = "Whether to apply softmax on the heatmap." ,
8297 ),
83- }
98+ },
8499 )
85100 return parameters
86101
@@ -137,23 +152,33 @@ def _decode_simcc(
137152 simcc_y : np .ndarray ,
138153 simcc_split_ratio : float = 2.0 ,
139154 apply_softmax : bool = False ,
155+ decode_beta : float = 150.0 ,
156+ sigma : float | int = 6.0 ,
140157) -> tuple [np .ndarray , np .ndarray ]:
141158 """Decodes keypoint coordinates from SimCC representations. The decoded coordinates are in the input image space.
142159
143160 Args:
144161 simcc_x (np.ndarray): SimCC label for x-axis
145162 simcc_y (np.ndarray): SimCC label for y-axis
146163 simcc_split_ratio (float): The ratio of the label size to the input size.
147- apply_softmax (bool): whether to apply softmax on the heatmap .
164+ apply_softmax (bool): whether to apply softmax during scores generation .
148165 Defaults to False.
166+ decode_beta (float): The beta value for decoding scores with softmax. Defaults
167+ to 150.0.
168+ sigma (float | int): The sigma value in the Gaussian SimCC
169+ label. Defaults to 6.0
149170
150171 Returns:
151172 tuple:
152173 - keypoints (np.ndarray): Decoded coordinates in shape (N, K, D)
153174 - scores (np.ndarray): The keypoint scores in shape (N, K).
154175 It usually represents the confidence of the keypoint prediction
155176 """
156- keypoints , scores = _get_simcc_maximum (simcc_x , simcc_y , apply_softmax )
177+ keypoints , scores = _get_simcc_maximum (simcc_x , simcc_y )
178+ if apply_softmax :
179+ _ , scores = _get_simcc_maximum (
180+ decode_beta * sigma * simcc_x , decode_beta * sigma * simcc_y , apply_softmax
181+ )
157182
158183 # Unsqueeze the instance dimension for single-instance results
159184 if keypoints .ndim == 2 :
@@ -169,6 +194,7 @@ def _get_simcc_maximum(
169194 simcc_x : np .ndarray ,
170195 simcc_y : np .ndarray ,
171196 apply_softmax : bool = False ,
197+ softmax_eps : float = 1e-06 ,
172198) -> tuple [np .ndarray , np .ndarray ]:
173199 """Get maximum response location and value from simcc representations.
174200
@@ -183,6 +209,8 @@ def _get_simcc_maximum(
183209 simcc_y (np.ndarray): y-axis SimCC in shape (K, Hy) or (N, K, Hy)
184210 apply_softmax (bool): whether to apply softmax on the heatmap.
185211 Defaults to False.
212+ softmax_eps (flat): a constant to avoid division by zero in softmax.
213+ Defaults to 1e-6.
186214
187215 Returns:
188216 tuple:
@@ -212,17 +240,21 @@ def _get_simcc_maximum(
212240 simcc_x = simcc_x - np .max (simcc_x , axis = 1 , keepdims = True )
213241 simcc_y = simcc_y - np .max (simcc_y , axis = 1 , keepdims = True )
214242 ex , ey = np .exp (simcc_x ), np .exp (simcc_y )
215- simcc_x = ex / np .sum (ex , axis = 1 , keepdims = True )
216- simcc_y = ey / np .sum (ey , axis = 1 , keepdims = True )
243+ simcc_x = ex / ( np .sum (ex , axis = 1 , keepdims = True ) + softmax_eps )
244+ simcc_y = ey / ( np .sum (ey , axis = 1 , keepdims = True ) + softmax_eps )
217245
218246 x_locs = np .argmax (simcc_x , axis = 1 )
219247 y_locs = np .argmax (simcc_y , axis = 1 )
220248 locs = np .stack ((x_locs , y_locs ), axis = - 1 ).astype (np .float32 )
221249 max_val_x = np .take_along_axis (
222- simcc_x , np .expand_dims (x_locs , axis = - 1 ), axis = - 1
250+ simcc_x ,
251+ np .expand_dims (x_locs , axis = - 1 ),
252+ axis = - 1 ,
223253 ).squeeze (axis = - 1 )
224254 max_val_y = np .take_along_axis (
225- simcc_y , np .expand_dims (y_locs , axis = - 1 ), axis = - 1
255+ simcc_y ,
256+ np .expand_dims (y_locs , axis = - 1 ),
257+ axis = - 1 ,
226258 ).squeeze (axis = - 1 )
227259
228260 mask = max_val_x > max_val_y
0 commit comments