Skip to content

Commit 546eefa

Browse files
authored
Merge pull request #15591 from lidanqing-intel/lidanqing/density_prior_box
optimize density_prior_box_op.h in detect model for cpu
2 parents dc5e25f + 4b3c661 commit 546eefa

File tree

1 file changed

+38
-26
lines changed

1 file changed

+38
-26
lines changed

paddle/fluid/operators/detection/density_prior_box_op.h

Lines changed: 38 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ class DensityPriorBoxOpKernel : public framework::OpKernel<T> {
5252
step_height = step_h;
5353
}
5454
int num_priors = 0;
55+
56+
#ifdef PADDLE_WITH_MKLML
57+
#pragma omp parallel for reduction(+ : num_priors)
58+
#endif
5559
for (size_t i = 0; i < densities.size(); ++i) {
5660
num_priors += (fixed_ratios.size()) * (pow(densities[i], 2));
5761
}
@@ -64,6 +68,17 @@ class DensityPriorBoxOpKernel : public framework::OpKernel<T> {
6468
auto e_boxes = framework::EigenTensor<T, 4>::From(*boxes).setConstant(0.0);
6569
int step_average = static_cast<int>((step_width + step_height) * 0.5);
6670

71+
std::vector<float> sqrt_fixed_ratios;
72+
#ifdef PADDLE_WITH_MKLML
73+
#pragma omp parallel for
74+
#endif
75+
for (int i = 0; i < fixed_ratios.size(); i++) {
76+
sqrt_fixed_ratios.push_back(sqrt(fixed_ratios[i]));
77+
}
78+
79+
#ifdef PADDLE_WITH_MKLML
80+
#pragma omp parallel for collapse(2)
81+
#endif
6782
for (int h = 0; h < feature_height; ++h) {
6883
for (int w = 0; w < feature_width; ++w) {
6984
T center_x = (w + offset) * step_width;
@@ -73,34 +88,25 @@ class DensityPriorBoxOpKernel : public framework::OpKernel<T> {
7388
for (size_t s = 0; s < fixed_sizes.size(); ++s) {
7489
auto fixed_size = fixed_sizes[s];
7590
int density = densities[s];
91+
int shift = step_average / density;
7692
// Generate density prior boxes with fixed ratios.
7793
for (size_t r = 0; r < fixed_ratios.size(); ++r) {
78-
float ar = fixed_ratios[r];
79-
int shift = step_average / density;
80-
float box_width_ratio = fixed_size * sqrt(ar);
81-
float box_height_ratio = fixed_size / sqrt(ar);
94+
float box_width_ratio = fixed_size * sqrt_fixed_ratios[r];
95+
float box_height_ratio = fixed_size / sqrt_fixed_ratios[r];
96+
float density_center_x = center_x - step_average / 2. + shift / 2.;
97+
float density_center_y = center_y - step_average / 2. + shift / 2.;
8298
for (int di = 0; di < density; ++di) {
8399
for (int dj = 0; dj < density; ++dj) {
84-
float center_x_temp =
85-
center_x - step_average / 2. + shift / 2. + dj * shift;
86-
float center_y_temp =
87-
center_y - step_average / 2. + shift / 2. + di * shift;
88-
e_boxes(h, w, idx, 0) =
89-
(center_x_temp - box_width_ratio / 2.) / img_width >= 0
90-
? (center_x_temp - box_width_ratio / 2.) / img_width
91-
: 0;
92-
e_boxes(h, w, idx, 1) =
93-
(center_y_temp - box_height_ratio / 2.) / img_height >= 0
94-
? (center_y_temp - box_height_ratio / 2.) / img_height
95-
: 0;
96-
e_boxes(h, w, idx, 2) =
97-
(center_x_temp + box_width_ratio / 2.) / img_width <= 1
98-
? (center_x_temp + box_width_ratio / 2.) / img_width
99-
: 1;
100-
e_boxes(h, w, idx, 3) =
101-
(center_y_temp + box_height_ratio / 2.) / img_height <= 1
102-
? (center_y_temp + box_height_ratio / 2.) / img_height
103-
: 1;
100+
float center_x_temp = density_center_x + dj * shift;
101+
float center_y_temp = density_center_y + di * shift;
102+
e_boxes(h, w, idx, 0) = std::max(
103+
(center_x_temp - box_width_ratio / 2.) / img_width, 0.);
104+
e_boxes(h, w, idx, 1) = std::max(
105+
(center_y_temp - box_height_ratio / 2.) / img_height, 0.);
106+
e_boxes(h, w, idx, 2) = std::min(
107+
(center_x_temp + box_width_ratio / 2.) / img_width, 1.);
108+
e_boxes(h, w, idx, 3) = std::min(
109+
(center_y_temp + box_height_ratio / 2.) / img_height, 1.);
104110
idx++;
105111
}
106112
}
@@ -131,8 +137,14 @@ class DensityPriorBoxOpKernel : public framework::OpKernel<T> {
131137
vars->Resize({box_num, static_cast<int>(variances.size())});
132138

133139
auto e_vars = framework::EigenMatrix<T, Eigen::RowMajor>::From(*vars);
134-
135-
e_vars = var_et.broadcast(Eigen::DSizes<int, 2>(box_num, 1));
140+
#ifdef PADDLE_WITH_MKLML
141+
#pragma omp parallel for collapse(2)
142+
#endif
143+
for (int i = 0; i < box_num; ++i) {
144+
for (int j = 0; j < variances.size(); ++j) {
145+
e_vars(i, j) = variances[j];
146+
}
147+
}
136148

137149
vars->Resize(var_dim);
138150
boxes->Resize(box_dim);

0 commit comments

Comments
 (0)