Skip to content

Commit c4c1f52

Browse files
authored
[Enhancement] optimize mmpose postprocess (#1888)
* optimize mmpose postprocess * remove cv::parallel_for
1 parent d48187c commit c4c1f52

File tree

2 files changed

+45
-71
lines changed

2 files changed

+45
-71
lines changed

csrc/mmdeploy/codebase/mmpose/keypoints_from_heatmap.cpp

Lines changed: 27 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,6 @@ namespace mmdeploy::mmpose {
1818
using std::string;
1919
using std::vector;
2020

21-
template <class F>
22-
struct _LoopBody : public cv::ParallelLoopBody {
23-
F f_;
24-
_LoopBody(F f) : f_(std::move(f)) {}
25-
void operator()(const cv::Range& range) const override { f_(range); }
26-
};
27-
2821
std::string to_lower(const std::string& s) {
2922
std::string t = s;
3023
std::transform(t.begin(), t.end(), t.begin(), [](unsigned char c) { return std::tolower(c); });
@@ -88,15 +81,11 @@ class TopdownHeatmapBaseHeadDecode : public MMPose {
8881
return to_value(std::move(output));
8982
}
9083

91-
Tensor keypoints_from_heatmap(const Tensor& _heatmap, const vector<float>& center,
84+
Tensor keypoints_from_heatmap(Tensor& heatmap, const vector<float>& center,
9285
const vector<float>& scale, bool unbiased_decoding,
9386
const string& post_process, int modulate_kernel,
9487
float valid_radius_factor, bool use_udp,
9588
const string& target_type) {
96-
Tensor heatmap(_heatmap.desc());
97-
heatmap.CopyFrom(_heatmap, stream()).value();
98-
stream().Wait().value();
99-
10089
int K = heatmap.shape(1);
10190
int H = heatmap.shape(2);
10291
int W = heatmap.shape(3);
@@ -114,14 +103,12 @@ class TopdownHeatmapBaseHeadDecode : public MMPose {
114103
} else if (to_lower(target_type) == to_lower(string("CombinedTarget"))) {
115104
// output channel = 3 * channel_cfg['num_output_channels']
116105
assert(K % 3 == 0);
117-
cv::parallel_for_(cv::Range(0, K), _LoopBody{[&](const cv::Range& r) {
118-
for (int i = r.start; i < r.end; i++) {
119-
int kt = (i % 3 == 0) ? 2 * modulate_kernel + 1 : modulate_kernel;
120-
float* data = heatmap.data<float>() + i * H * W;
121-
cv::Mat work = cv::Mat(H, W, CV_32FC(1), data);
122-
cv::GaussianBlur(work, work, {kt, kt}, 0); // inplace
123-
}
124-
}});
106+
for (int i = 0; i < K; i++) {
107+
int kt = (i % 3 == 0) ? 2 * modulate_kernel + 1 : modulate_kernel;
108+
float* data = heatmap.data<float>() + i * H * W;
109+
cv::Mat work = cv::Mat(H, W, CV_32FC(1), data);
110+
cv::GaussianBlur(work, work, {kt, kt}, 0); // inplace
111+
}
125112
float valid_radius = valid_radius_factor_ * H;
126113
TensorDesc desc = {Device{"cpu"}, DataType::kFLOAT, {1, K / 3, H, W}};
127114
Tensor offset_x(desc);
@@ -209,13 +196,11 @@ class TopdownHeatmapBaseHeadDecode : public MMPose {
209196
int K = heatmap.shape(1);
210197
int H = heatmap.shape(2);
211198
int W = heatmap.shape(3);
212-
cv::parallel_for_(cv::Range(0, K), _LoopBody{[&](const cv::Range& r) {
213-
for (int i = r.start; i < r.end; i++) {
214-
float* data = heatmap.data<float>() + i * H * W;
215-
cv::Mat work = cv::Mat(H, W, CV_32FC(1), data);
216-
cv::GaussianBlur(work, work, {kernel, kernel}, 0); // inplace
217-
}
218-
}});
199+
for (int i = 0; i < K; i++) {
200+
float* data = heatmap.data<float>() + i * H * W;
201+
cv::Mat work = cv::Mat(H, W, CV_32FC(1), data);
202+
cv::GaussianBlur(work, work, {kernel, kernel}, 0); // inplace
203+
}
219204
std::for_each(heatmap.data<float>(), heatmap.data<float>() + K * H * W, [](float& x) {
220205
x = std::max(0.001f, std::min(50.f, x));
221206
x = std::log(x);
@@ -341,23 +326,21 @@ class TopdownHeatmapBaseHeadDecode : public MMPose {
341326
TensorDesc pred_desc = {Device{"cpu"}, DataType::kFLOAT, {1, K, 3}};
342327
Tensor pred(pred_desc);
343328

344-
cv::parallel_for_(cv::Range(0, K), _LoopBody{[&](const cv::Range& r) {
345-
for (int i = r.start; i < r.end; i++) {
346-
float* src_data = const_cast<float*>(heatmap.data<float>()) + i * H * W;
347-
cv::Mat mat = cv::Mat(H, W, CV_32FC1, src_data);
348-
double min_val, max_val;
349-
cv::Point min_loc, max_loc;
350-
cv::minMaxLoc(mat, &min_val, &max_val, &min_loc, &max_loc);
351-
float* dst_data = pred.data<float>() + i * 3;
352-
*(dst_data + 0) = -1;
353-
*(dst_data + 1) = -1;
354-
*(dst_data + 2) = max_val;
355-
if (max_val > 0.0) {
356-
*(dst_data + 0) = max_loc.x;
357-
*(dst_data + 1) = max_loc.y;
358-
}
359-
}
360-
}});
329+
for (int i = 0; i < K; i++) {
330+
float* src_data = const_cast<float*>(heatmap.data<float>()) + i * H * W;
331+
cv::Mat mat = cv::Mat(H, W, CV_32FC1, src_data);
332+
double min_val, max_val;
333+
cv::Point min_loc, max_loc;
334+
cv::minMaxLoc(mat, &min_val, &max_val, &min_loc, &max_loc);
335+
float* dst_data = pred.data<float>() + i * 3;
336+
*(dst_data + 0) = -1;
337+
*(dst_data + 1) = -1;
338+
*(dst_data + 2) = max_val;
339+
if (max_val > 0.0) {
340+
*(dst_data + 0) = max_loc.x;
341+
*(dst_data + 1) = max_loc.y;
342+
}
343+
}
361344

362345
return pred;
363346
}

csrc/mmdeploy/codebase/mmpose/simcc_label.cpp

Lines changed: 18 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,6 @@ namespace mmdeploy::mmpose {
1919
using std::string;
2020
using std::vector;
2121

22-
template <class F>
23-
struct _LoopBody : public cv::ParallelLoopBody {
24-
F f_;
25-
_LoopBody(F f) : f_(std::move(f)) {}
26-
void operator()(const cv::Range& range) const override { f_(range); }
27-
};
28-
2922
class SimCCLabelDecode : public MMPose {
3023
public:
3124
explicit SimCCLabelDecode(const Value& config) : MMPose(config) {
@@ -89,26 +82,24 @@ class SimCCLabelDecode : public MMPose {
8982
int N_x = simcc_x.shape(2);
9083
int N_y = simcc_y.shape(2);
9184

92-
cv::parallel_for_(cv::Range(0, K), _LoopBody{[&](const cv::Range& r) {
93-
for (int i = r.start; i < r.end; i++) {
94-
float* data_x = const_cast<float*>(simcc_x.data<float>()) + i * N_x;
95-
float* data_y = const_cast<float*>(simcc_y.data<float>()) + i * N_y;
96-
cv::Mat mat_x = cv::Mat(N_x, 1, CV_32FC1, data_x);
97-
cv::Mat mat_y = cv::Mat(N_y, 1, CV_32FC1, data_y);
98-
double min_val_x, max_val_x, min_val_y, max_val_y;
99-
cv::Point min_loc_x, max_loc_x, min_loc_y, max_loc_y;
100-
cv::minMaxLoc(mat_x, &min_val_x, &max_val_x, &min_loc_x, &max_loc_x);
101-
cv::minMaxLoc(mat_y, &min_val_y, &max_val_y, &min_loc_y, &max_loc_y);
102-
float s = max_val_x > max_val_y ? max_val_y : max_val_x;
103-
float x = s > 0 ? max_loc_x.y : -1.0;
104-
float y = s > 0 ? max_loc_y.y : -1.0;
105-
float* keypoints_data = keypoints.data<float>() + i * 2;
106-
float* scores_data = scores.data<float>() + i;
107-
*(scores_data) = s;
108-
*(keypoints_data + 0) = x;
109-
*(keypoints_data + 1) = y;
110-
}
111-
}});
85+
for (int i = 0; i < K; i++) {
86+
float* data_x = const_cast<float*>(simcc_x.data<float>()) + i * N_x;
87+
float* data_y = const_cast<float*>(simcc_y.data<float>()) + i * N_y;
88+
cv::Mat mat_x = cv::Mat(N_x, 1, CV_32FC1, data_x);
89+
cv::Mat mat_y = cv::Mat(N_y, 1, CV_32FC1, data_y);
90+
double min_val_x, max_val_x, min_val_y, max_val_y;
91+
cv::Point min_loc_x, max_loc_x, min_loc_y, max_loc_y;
92+
cv::minMaxLoc(mat_x, &min_val_x, &max_val_x, &min_loc_x, &max_loc_x);
93+
cv::minMaxLoc(mat_y, &min_val_y, &max_val_y, &min_loc_y, &max_loc_y);
94+
float s = max_val_x > max_val_y ? max_val_y : max_val_x;
95+
float x = s > 0 ? max_loc_x.y : -1.0;
96+
float y = s > 0 ? max_loc_y.y : -1.0;
97+
float* keypoints_data = keypoints.data<float>() + i * 2;
98+
float* scores_data = scores.data<float>() + i;
99+
*(scores_data) = s;
100+
*(keypoints_data + 0) = x;
101+
*(keypoints_data + 1) = y;
102+
}
112103
}
113104

114105
private:

0 commit comments

Comments
 (0)