Skip to content

Commit 29069c5

Browse files
committed
Ad softmax to kp postprocessing
1 parent 3b37363 commit 29069c5

File tree

6 files changed

+77
-14
lines changed

6 files changed

+77
-14
lines changed

src/cpp/models/include/models/keypoint_detection.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2020-2024 Intel Corporation
2+
* Copyright (C) 2020-2025 Intel Corporation
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

@@ -38,6 +38,8 @@ class KeypointDetectionModel : public ImageModel {
3838
static std::string ModelType;
3939

4040
protected:
41+
bool apply_softmax = true;
42+
4143
void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
4244
void updateModelInfo() override;
4345
void init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority);

src/cpp/models/include/models/results.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#pragma once
77
#include <map>
88
#include <memory>
9+
#include <numeric>
910
#include <opencv2/core.hpp>
1011
#include <opencv2/imgproc.hpp>
1112
#include <openvino/openvino.hpp>
@@ -356,8 +357,11 @@ struct DetectedKeypoints {
356357
for (const cv::Point2f& keypoint : prediction.keypoints) {
357358
kp_x_sum += keypoint.x;
358359
}
360+
float scores_sum = std::accumulate(prediction.scores.begin(), prediction.scores.end(), 0.f);
361+
359362
os << "keypoints: (" << prediction.keypoints.size() << ", 2), keypoints_x_sum: ";
360-
os << std::fixed << std::setprecision(3) << kp_x_sum << ", scores: (" << prediction.scores.size() << ",)";
363+
os << std::fixed << std::setprecision(3) << kp_x_sum << ", scores: (" << prediction.scores.size()
364+
<< ",) " << std::fixed << std::setprecision(3) << scores_sum;
361365
return os;
362366
}
363367

src/cpp/models/src/keypoint_detection.cpp

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2020-2024 Intel Corporation
2+
* Copyright (C) 2020-2025 Intel Corporation
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

@@ -22,7 +22,7 @@ void colArgMax(const cv::Mat& src, cv::Mat& dst_locs, cv::Mat& dst_values) {
2222
dst_locs = cv::Mat::zeros(src.rows, 1, CV_32S);
2323
dst_values = cv::Mat::zeros(src.rows, 1, CV_32F);
2424

25-
for (int row = 0; row < src.rows; row++) {
25+
for (int row = 0; row < src.rows; ++row) {
2626
const float* ptr_row = src.ptr<float>(row);
2727
int max_val_idx = 0;
2828
dst_values.at<float>(row) = ptr_row[max_val_idx];
@@ -36,10 +36,43 @@ void colArgMax(const cv::Mat& src, cv::Mat& dst_locs, cv::Mat& dst_values) {
3636
}
3737
}
3838

39-
DetectedKeypoints decode_simcc(const cv::Mat& simcc_x,
40-
const cv::Mat& simcc_y,
39+
cv::Mat softmax_row(const cv::Mat& src) {
40+
cv::Mat result = src.clone();
41+
42+
for (int row = 0; row < result.rows; ++row) {
43+
float* ptr_row = result.ptr<float>(row);
44+
float max_val = ptr_row[0];
45+
for (int col = 1; col < result.cols; ++col) {
46+
if (ptr_row[col] > max_val) {
47+
max_val = ptr_row[col];
48+
}
49+
}
50+
float sum = 0.0f;
51+
for (int col = 0; col < result.cols; col++) {
52+
ptr_row[col] = exp(ptr_row[col] - max_val);
53+
sum += ptr_row[col];
54+
}
55+
for (int col = 0; col < result.cols; ++col) {
56+
ptr_row[col] /= sum;
57+
}
58+
}
59+
60+
return result;
61+
}
62+
63+
DetectedKeypoints decode_simcc(const cv::Mat& simcc_x_input,
64+
const cv::Mat& simcc_y_input,
4165
const cv::Point2f& extra_scale = cv::Point2f(1.f, 1.f),
42-
float simcc_split_ratio = 2.0f) {
66+
float simcc_split_ratio = 2.0f,
67+
bool apply_softmax = false) {
68+
cv::Mat simcc_x = simcc_x_input;
69+
cv::Mat simcc_y = simcc_y_input;
70+
71+
if (apply_softmax) {
72+
simcc_x = softmax_row(simcc_x);
73+
simcc_x = softmax_row(simcc_y);
74+
}
75+
4376
cv::Mat x_locs, max_val_x;
4477
colArgMax(simcc_x, x_locs, max_val_x);
4578

@@ -67,6 +100,7 @@ std::string KeypointDetectionModel::ModelType = "keypoint_detection";
67100

68101
void KeypointDetectionModel::init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority) {
69102
labels = get_from_any_maps("labels", top_priority, mid_priority, labels);
103+
apply_softmax = get_from_any_maps("apply_softmax", top_priority, mid_priority, apply_softmax);
70104
}
71105

72106
KeypointDetectionModel::KeypointDetectionModel(std::shared_ptr<ov::Model>& model, const ov::AnyMap& configuration)
@@ -204,7 +238,8 @@ std::unique_ptr<ResultBase> KeypointDetectionModel::postprocess(InferenceResult&
204238
float inverted_scale_x = static_cast<float>(image_data.inputImgWidth) / netInputWidth,
205239
inverted_scale_y = static_cast<float>(image_data.inputImgHeight) / netInputHeight;
206240

207-
result->poses.emplace_back(decode_simcc(pred_x_mat, pred_y_mat, {inverted_scale_x, inverted_scale_y}));
241+
result->poses.emplace_back(
242+
decode_simcc(pred_x_mat, pred_y_mat, {inverted_scale_x, inverted_scale_y}, apply_softmax));
208243
return std::unique_ptr<ResultBase>(result);
209244
}
210245

src/python/model_api/models/keypoint_detection.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#
2-
# Copyright (C) 2020-2024 Intel Corporation
2+
# Copyright (C) 2020-2025 Intel Corporation
33
# SPDX-License-Identifier: Apache-2.0
44
#
55

@@ -11,7 +11,7 @@
1111

1212
from .image_model import ImageModel
1313
from .result import DetectedKeypoints, DetectionResult
14-
from .types import ListValue
14+
from .types import BooleanValue, ListValue
1515

1616

1717
class KeypointDetectionModel(ImageModel):
@@ -30,6 +30,7 @@ def __init__(self, inference_adapter, configuration: dict = {}, preload=False):
3030
"""
3131
super().__init__(inference_adapter, configuration, preload)
3232
self._check_io_number(1, 2)
33+
self.apply_softmax: bool
3334

3435
def postprocess(
3536
self,
@@ -46,7 +47,11 @@ def postprocess(
4647
DetectedKeypoints: detected keypoints
4748
"""
4849
encoded_kps = list(outputs.values())
49-
batch_keypoints, batch_scores = _decode_simcc(*encoded_kps)
50+
batch_keypoints, batch_scores = _decode_simcc(
51+
encoded_kps[0],
52+
encoded_kps[1],
53+
apply_softmax=self.apply_softmax,
54+
)
5055
orig_h, orig_w = meta["original_shape"][:2]
5156
kp_scale_h = orig_h / self.h
5257
kp_scale_w = orig_w / self.w
@@ -63,6 +68,10 @@ def parameters(cls) -> dict:
6368
value_type=str,
6469
default_value=[],
6570
),
71+
"apply_softmax": BooleanValue(
72+
default_value=True,
73+
description="Whether to apply softmax on the heatmap.",
74+
),
6675
},
6776
)
6877
return parameters
@@ -119,21 +128,24 @@ def _decode_simcc(
119128
simcc_x: np.ndarray,
120129
simcc_y: np.ndarray,
121130
simcc_split_ratio: float = 2.0,
131+
apply_softmax: bool = False,
122132
) -> tuple[np.ndarray, np.ndarray]:
123133
"""Decodes keypoint coordinates from SimCC representations. The decoded coordinates are in the input image space.
124134
125135
Args:
126136
simcc_x (np.ndarray): SimCC label for x-axis
127137
simcc_y (np.ndarray): SimCC label for y-axis
128138
simcc_split_ratio (float): The ratio of the label size to the input size.
139+
apply_softmax (bool): whether to apply softmax on the heatmap.
140+
Defaults to False.
129141
130142
Returns:
131143
tuple:
132144
- keypoints (np.ndarray): Decoded coordinates in shape (N, K, D)
133145
- scores (np.ndarray): The keypoint scores in shape (N, K).
134146
It usually represents the confidence of the keypoint prediction
135147
"""
136-
keypoints, scores = _get_simcc_maximum(simcc_x, simcc_y)
148+
keypoints, scores = _get_simcc_maximum(simcc_x, simcc_y, apply_softmax)
137149

138150
# Unsqueeze the instance dimension for single-instance results
139151
if keypoints.ndim == 2:
@@ -148,6 +160,7 @@ def _decode_simcc(
148160
def _get_simcc_maximum(
149161
simcc_x: np.ndarray,
150162
simcc_y: np.ndarray,
163+
apply_softmax: bool = False,
151164
) -> tuple[np.ndarray, np.ndarray]:
152165
"""Get maximum response location and value from simcc representations.
153166
@@ -160,6 +173,8 @@ def _get_simcc_maximum(
160173
Args:
161174
simcc_x (np.ndarray): x-axis SimCC in shape (K, Wx) or (N, K, Wx)
162175
simcc_y (np.ndarray): y-axis SimCC in shape (K, Hy) or (N, K, Hy)
176+
apply_softmax (bool): whether to apply softmax on the heatmap.
177+
Defaults to False.
163178
164179
Returns:
165180
tuple:
@@ -185,6 +200,13 @@ def _get_simcc_maximum(
185200
else:
186201
batch_size = None
187202

203+
if apply_softmax:
204+
simcc_x = simcc_x - np.max(simcc_x, axis=1, keepdims=True)
205+
simcc_y = simcc_y - np.max(simcc_y, axis=1, keepdims=True)
206+
ex, ey = np.exp(simcc_x), np.exp(simcc_y)
207+
simcc_x = ex / np.sum(ex, axis=1, keepdims=True)
208+
simcc_y = ey / np.sum(ey, axis=1, keepdims=True)
209+
188210
x_locs = np.argmax(simcc_x, axis=1)
189211
y_locs = np.argmax(simcc_y, axis=1)
190212
locs = np.stack((x_locs, y_locs), axis=-1).astype(np.float32)

src/python/model_api/models/result/keypoint.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,5 @@ def __str__(self):
1717
return (
1818
f"keypoints: {self.keypoints.shape}, "
1919
f"keypoints_x_sum: {np.sum(self.keypoints[:, :1]):.3f}, "
20-
f"scores: {self.scores.shape}"
20+
f"scores: {self.scores.shape} {np.sum(self.scores):.3f}"
2121
)

tests/python/accuracy/public_scope.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@
425425
{
426426
"image": "coco128/images/train2017/000000000471.jpg",
427427
"reference": [
428-
"keypoints: (17, 2), keypoints_x_sum: 5700.000, scores: (17,)"
428+
"keypoints: (17, 2), keypoints_x_sum: 5700.000, scores: (17,) 0.049"
429429
]
430430
}
431431
]

0 commit comments

Comments
 (0)