Skip to content

Commit 8b4871a

Browse files
dkurtvpisarev
authored andcommitted
Use only absolute prior boxes explicit sizes. Remove scales attributes. (opencv#10874)
* Use only absolute prior boxes explicit sizes. Remove scales attributes. * Simplified PriorBox layer forward pass
1 parent 88b689b commit 8b4871a

File tree

3 files changed

+75
-174
lines changed

3 files changed

+75
-174
lines changed

modules/dnn/src/layers/prior_box_layer.cpp

Lines changed: 60 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -179,44 +179,62 @@ class PriorBoxLayerImpl : public PriorBoxLayer
179179
}
180180

181181
PriorBoxLayerImpl(const LayerParams &params)
182-
: _boxWidth(0), _boxHeight(0)
183182
{
184183
setParamsFrom(params);
185184
_minSize = getParameter<float>(params, "min_size", 0, false, 0);
186185
_flip = getParameter<bool>(params, "flip", 0, false, true);
187186
_clip = getParameter<bool>(params, "clip", 0, false, true);
188187
_bboxesNormalized = getParameter<bool>(params, "normalized_bbox", 0, false, true);
189188

190-
_scales.clear();
191189
_aspectRatios.clear();
192190

193191
getAspectRatios(params);
194192
getVariance(params);
195-
getParams("scales", params, &_scales);
196-
getParams("width", params, &_widths);
197-
getParams("height", params, &_heights);
198-
_explicitSizes = !_widths.empty();
199-
CV_Assert(_widths.size() == _heights.size());
193+
194+
_maxSize = -1;
195+
if (params.has("max_size"))
196+
{
197+
_maxSize = params.get("max_size").get<float>(0);
198+
CV_Assert(_maxSize > _minSize);
199+
}
200+
201+
std::vector<float> widths, heights;
202+
getParams("width", params, &widths);
203+
getParams("height", params, &heights);
204+
_explicitSizes = !widths.empty();
205+
CV_Assert(widths.size() == heights.size());
200206

201207
if (_explicitSizes)
202208
{
203209
CV_Assert(_aspectRatios.empty(), !params.has("min_size"), !params.has("max_size"));
204-
_numPriors = _widths.size();
210+
_boxWidths = widths;
211+
_boxHeights = heights;
205212
}
206213
else
207214
{
208215
CV_Assert(!_aspectRatios.empty(), _minSize > 0);
209-
_numPriors = _aspectRatios.size() + 1; // + 1 for an aspect ratio 1.0
210-
}
216+
_boxWidths.resize(1 + (_maxSize > 0 ? 1 : 0) + _aspectRatios.size());
217+
_boxHeights.resize(_boxWidths.size());
218+
_boxWidths[0] = _boxHeights[0] = _minSize;
211219

212-
_maxSize = -1;
213-
if (params.has("max_size"))
214-
{
215-
_maxSize = params.get("max_size").get<float>(0);
216-
CV_Assert(_maxSize > _minSize);
220+
int i = 1;
221+
if (_maxSize > 0)
222+
{
223+
// second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
224+
_boxWidths[i] = _boxHeights[i] = sqrt(_minSize * _maxSize);
225+
i += 1;
226+
}
217227

218-
_numPriors += 1;
228+
// rest of priors
229+
for (size_t r = 0; r < _aspectRatios.size(); ++r)
230+
{
231+
float arSqrt = sqrt(_aspectRatios[r]);
232+
_boxWidths[i + r] = _minSize * arSqrt;
233+
_boxHeights[i + r] = _minSize / arSqrt;
234+
}
219235
}
236+
CV_Assert(_boxWidths.size() == _boxHeights.size());
237+
_numPriors = _boxWidths.size();
220238

221239
if (params.has("step_h") || params.has("step_w")) {
222240
CV_Assert(!params.has("step"));
@@ -252,8 +270,7 @@ class PriorBoxLayerImpl : public PriorBoxLayer
252270
virtual bool supportBackend(int backendId)
253271
{
254272
return backendId == DNN_BACKEND_DEFAULT ||
255-
backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() &&
256-
_scales.empty() && !_explicitSizes;
273+
backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && !_explicitSizes;
257274
}
258275

259276
bool getMemoryShapes(const std::vector<MatShape> &inputs,
@@ -307,27 +324,16 @@ class PriorBoxLayerImpl : public PriorBoxLayer
307324
if (umat_offsetsX.empty())
308325
{
309326
Mat offsetsX(1, _offsetsX.size(), CV_32FC1, &_offsetsX[0]);
310-
Mat offsetsY(1, _offsetsX.size(), CV_32FC1, &_offsetsY[0]);
311-
Mat aspectRatios(1, _aspectRatios.size(), CV_32FC1, &_aspectRatios[0]);
327+
Mat offsetsY(1, _offsetsY.size(), CV_32FC1, &_offsetsY[0]);
312328
Mat variance(1, _variance.size(), CV_32FC1, &_variance[0]);
329+
Mat widths(1, _boxWidths.size(), CV_32FC1, &_boxWidths[0]);
330+
Mat heights(1, _boxHeights.size(), CV_32FC1, &_boxHeights[0]);
313331

314332
offsetsX.copyTo(umat_offsetsX);
315333
offsetsY.copyTo(umat_offsetsY);
316-
aspectRatios.copyTo(umat_aspectRatios);
317334
variance.copyTo(umat_variance);
318-
319-
int real_numPriors = _numPriors >> (_offsetsX.size() - 1);
320-
if (_scales.empty())
321-
{
322-
_scales.resize(real_numPriors, 1.0f);
323-
umat_scales = UMat(1, &real_numPriors, CV_32F, 1.0f);
324-
}
325-
else
326-
{
327-
CV_Assert(_scales.size() == real_numPriors);
328-
Mat scales(1, _scales.size(), CV_32FC1, &_scales[0]);
329-
scales.copyTo(umat_scales);
330-
}
335+
widths.copyTo(umat_widths);
336+
heights.copyTo(umat_heights);
331337
}
332338

333339
size_t nthreads = _layerHeight * _layerWidth;
@@ -336,19 +342,17 @@ class PriorBoxLayerImpl : public PriorBoxLayer
336342
kernel.set(0, (int)nthreads);
337343
kernel.set(1, (float)stepX);
338344
kernel.set(2, (float)stepY);
339-
kernel.set(3, (float)_minSize);
340-
kernel.set(4, (float)_maxSize);
341-
kernel.set(5, ocl::KernelArg::PtrReadOnly(umat_offsetsX));
342-
kernel.set(6, ocl::KernelArg::PtrReadOnly(umat_offsetsY));
343-
kernel.set(7, (int)_offsetsX.size());
344-
kernel.set(8, ocl::KernelArg::PtrReadOnly(umat_aspectRatios));
345-
kernel.set(9, (int)_aspectRatios.size());
346-
kernel.set(10, ocl::KernelArg::PtrReadOnly(umat_scales));
347-
kernel.set(11, ocl::KernelArg::PtrWriteOnly(outputs[0]));
348-
kernel.set(12, (int)_layerHeight);
349-
kernel.set(13, (int)_layerWidth);
350-
kernel.set(14, (int)_imageHeight);
351-
kernel.set(15, (int)_imageWidth);
345+
kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_offsetsX));
346+
kernel.set(4, ocl::KernelArg::PtrReadOnly(umat_offsetsY));
347+
kernel.set(5, (int)_offsetsX.size());
348+
kernel.set(6, ocl::KernelArg::PtrReadOnly(umat_widths));
349+
kernel.set(7, ocl::KernelArg::PtrReadOnly(umat_heights));
350+
kernel.set(8, (int)_boxWidths.size());
351+
kernel.set(9, ocl::KernelArg::PtrWriteOnly(outputs[0]));
352+
kernel.set(10, (int)_layerHeight);
353+
kernel.set(11, (int)_layerWidth);
354+
kernel.set(12, (int)_imageHeight);
355+
kernel.set(13, (int)_imageWidth);
352356
kernel.run(1, &nthreads, NULL, false);
353357

354358
// clip the prior's coordidate such that it is within [0, 1]
@@ -401,12 +405,6 @@ class PriorBoxLayerImpl : public PriorBoxLayer
401405

402406
CV_Assert(inputs.size() == 2);
403407

404-
size_t real_numPriors = _numPriors >> (_offsetsX.size() - 1);
405-
if (_scales.empty())
406-
_scales.resize(real_numPriors, 1.0f);
407-
else
408-
CV_Assert(_scales.size() == real_numPriors);
409-
410408
int _layerWidth = inputs[0]->size[3];
411409
int _layerHeight = inputs[0]->size[2];
412410

@@ -425,72 +423,15 @@ class PriorBoxLayerImpl : public PriorBoxLayer
425423
int _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;
426424

427425
float* outputPtr = outputs[0].ptr<float>();
426+
float _boxWidth, _boxHeight;
428427
for (size_t h = 0; h < _layerHeight; ++h)
429428
{
430429
for (size_t w = 0; w < _layerWidth; ++w)
431430
{
432-
// first prior: aspect_ratio = 1, size = min_size
433-
if (_explicitSizes)
431+
for (size_t i = 0; i < _boxWidths.size(); ++i)
434432
{
435-
_boxWidth = _widths[0] * _scales[0];
436-
_boxHeight = _heights[0] * _scales[0];
437-
if (_bboxesNormalized)
438-
{
439-
_boxWidth *= _imageWidth;
440-
_boxHeight *= _imageHeight;
441-
}
442-
}
443-
else
444-
_boxWidth = _boxHeight = _minSize * _scales[0];
445-
446-
for (int i = 0; i < _offsetsX.size(); ++i)
447-
{
448-
float center_x = (w + _offsetsX[i]) * stepX;
449-
float center_y = (h + _offsetsY[i]) * stepY;
450-
outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth,
451-
_imageHeight, _bboxesNormalized, outputPtr);
452-
}
453-
if (_maxSize > 0)
454-
{
455-
// second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
456-
_boxWidth = _boxHeight = sqrt(_minSize * _maxSize) * _scales[1];
457-
for (int i = 0; i < _offsetsX.size(); ++i)
458-
{
459-
float center_x = (w + _offsetsX[i]) * stepX;
460-
float center_y = (h + _offsetsY[i]) * stepY;
461-
outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth,
462-
_imageHeight, _bboxesNormalized, outputPtr);
463-
}
464-
}
465-
466-
// rest of priors
467-
CV_Assert(_aspectRatios.empty() || (_maxSize > 0 ? 2 : 1) + _aspectRatios.size() == _scales.size());
468-
for (size_t r = 0; r < _aspectRatios.size(); ++r)
469-
{
470-
float ar = _aspectRatios[r];
471-
float scale = _scales[(_maxSize > 0 ? 2 : 1) + r];
472-
_boxWidth = _minSize * sqrt(ar) * scale;
473-
_boxHeight = _minSize / sqrt(ar) * scale;
474-
for (int i = 0; i < _offsetsX.size(); ++i)
475-
{
476-
float center_x = (w + _offsetsX[i]) * stepX;
477-
float center_y = (h + _offsetsY[i]) * stepY;
478-
outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth,
479-
_imageHeight, _bboxesNormalized, outputPtr);
480-
}
481-
}
482-
483-
// rest of sizes
484-
CV_Assert(_widths.empty() || _widths.size() == _scales.size());
485-
for (size_t i = 1; i < _widths.size(); ++i)
486-
{
487-
_boxWidth = _widths[i] * _scales[i];
488-
_boxHeight = _heights[i] * _scales[i];
489-
if (_bboxesNormalized)
490-
{
491-
_boxWidth *= _imageWidth;
492-
_boxHeight *= _imageHeight;
493-
}
433+
_boxWidth = _boxWidths[i];
434+
_boxHeight = _boxHeights[i];
494435
for (int j = 0; j < _offsetsX.size(); ++j)
495436
{
496437
float center_x = (w + _offsetsX[j]) * stepX;
@@ -591,24 +532,21 @@ class PriorBoxLayerImpl : public PriorBoxLayer
591532
float _minSize;
592533
float _maxSize;
593534

594-
float _boxWidth;
595-
float _boxHeight;
596-
597535
float _stepX, _stepY;
598536

599537
std::vector<float> _aspectRatios;
600538
std::vector<float> _variance;
601-
std::vector<float> _scales;
602-
std::vector<float> _widths;
603-
std::vector<float> _heights;
604539
std::vector<float> _offsetsX;
605540
std::vector<float> _offsetsY;
541+
// Precomputed final widhts and heights based on aspect ratios or explicit sizes.
542+
std::vector<float> _boxWidths;
543+
std::vector<float> _boxHeights;
606544

607545
#ifdef HAVE_OPENCL
608546
UMat umat_offsetsX;
609547
UMat umat_offsetsY;
610-
UMat umat_aspectRatios;
611-
UMat umat_scales;
548+
UMat umat_widths;
549+
UMat umat_heights;
612550
UMat umat_variance;
613551
#endif
614552

modules/dnn/src/opencl/prior_box.cl

Lines changed: 11 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,12 @@
4545
__kernel void prior_box(const int nthreads,
4646
const Dtype stepX,
4747
const Dtype stepY,
48-
const Dtype _minSize,
49-
const Dtype _maxSize,
5048
__global const Dtype* _offsetsX,
5149
__global const Dtype* _offsetsY,
5250
const int offsetsX_size,
53-
__global const Dtype* _aspectRatios,
54-
const int aspectRatios_size,
55-
__global const Dtype* scales,
51+
__global const Dtype* _widths,
52+
__global const Dtype* _heights,
53+
const int widths_size,
5654
__global Dtype* dst,
5755
const int _layerHeight,
5856
const int _layerWidth,
@@ -64,57 +62,19 @@ __kernel void prior_box(const int nthreads,
6462
int w = index % _layerWidth;
6563
int h = index / _layerWidth;
6664
__global Dtype* outputPtr;
67-
int aspect_count = (_maxSize > 0) ? 1 : 0;
68-
outputPtr = dst + index * 4 * offsetsX_size * (1 + aspect_count + aspectRatios_size);
65+
66+
outputPtr = dst + index * 4 * offsetsX_size * widths_size;
6967

7068
Dtype _boxWidth, _boxHeight;
7169
Dtype4 vec;
72-
_boxWidth = _boxHeight = _minSize * scales[0];
73-
for (int i = 0; i < offsetsX_size; ++i)
74-
{
75-
float center_x = (w + _offsetsX[i]) * stepX;
76-
float center_y = (h + _offsetsY[i]) * stepY;
77-
78-
vec.x = (center_x - _boxWidth * 0.5f) / imgWidth; // xmin
79-
vec.y = (center_y - _boxHeight * 0.5f) / imgHeight; // ymin
80-
vec.z = (center_x + _boxWidth * 0.5f) / imgWidth; // xmax
81-
vec.w = (center_y + _boxHeight * 0.5f) / imgHeight; // ymax
82-
vstore4(vec, 0, outputPtr);
83-
84-
outputPtr += 4;
85-
}
86-
87-
if (_maxSize > 0)
88-
{
89-
_boxWidth = _boxHeight = native_sqrt(_minSize * _maxSize) * scales[1];
90-
91-
for (int i = 0; i < offsetsX_size; ++i)
92-
{
93-
float center_x = (w + _offsetsX[i]) * stepX;
94-
float center_y = (h + _offsetsY[i]) * stepY;
95-
96-
vec.x = (center_x - _boxWidth * 0.5f) / imgWidth; // xmin
97-
vec.y = (center_y - _boxHeight * 0.5f) / imgHeight; // ymin
98-
vec.z = (center_x + _boxWidth * 0.5f) / imgWidth; // xmax
99-
vec.w = (center_y + _boxHeight * 0.5f) / imgHeight; // ymax
100-
vstore4(vec, 0, outputPtr);
101-
102-
outputPtr += 4;
103-
}
104-
}
105-
106-
for (int r = 0; r < aspectRatios_size; ++r)
70+
for (int i = 0; i < widths_size; ++i)
10771
{
108-
float ar = native_sqrt(_aspectRatios[r]);
109-
float scale = scales[(_maxSize > 0 ? 2 : 1) + r];
110-
111-
_boxWidth = _minSize * ar * scale;
112-
_boxHeight = _minSize / ar * scale;
113-
114-
for (int i = 0; i < offsetsX_size; ++i)
72+
_boxWidth = _widths[i];
73+
_boxHeight = _heights[i];
74+
for (int j = 0; j < offsetsX_size; ++j)
11575
{
116-
float center_x = (w + _offsetsX[i]) * stepX;
117-
float center_y = (h + _offsetsY[i]) * stepY;
76+
float center_x = (w + _offsetsX[j]) * stepX;
77+
float center_y = (h + _offsetsY[j]) * stepY;
11878

11979
vec.x = (center_x - _boxWidth * 0.5f) / imgWidth; // xmin
12080
vec.y = (center_y - _boxHeight * 0.5f) / imgHeight; // ymin

samples/dnn/tf_text_graph_ssd.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
parser.add_argument('--num_layers', default=6, type=int, help='Hyper-parameter of ssd_anchor_generator from config file.')
2727
parser.add_argument('--aspect_ratios', default=[1.0, 2.0, 0.5, 3.0, 0.333], type=float, nargs='+',
2828
help='Hyper-parameter of ssd_anchor_generator from config file.')
29+
parser.add_argument('--image_width', default=300, type=int, help='Training images width.')
30+
parser.add_argument('--image_height', default=300, type=int, help='Training images height.')
2931
args = parser.parse_args()
3032

3133
# Nodes that should be kept.
@@ -192,7 +194,6 @@ def tensorMsg(values):
192194

193195
text_format.Merge('b: false', priorBox.attr["flip"])
194196
text_format.Merge('b: false', priorBox.attr["clip"])
195-
text_format.Merge('b: true', priorBox.attr["normalized_bbox"])
196197

197198
if i == 0:
198199
widths = [args.min_scale * 0.5, args.min_scale * sqrt(2.0), args.min_scale * sqrt(0.5)]
@@ -203,6 +204,8 @@ def tensorMsg(values):
203204

204205
widths += [sqrt(scales[i] * scales[i + 1])]
205206
heights += [sqrt(scales[i] * scales[i + 1])]
207+
widths = [w * args.image_width for w in widths]
208+
heights = [h * args.image_height for h in heights]
206209
text_format.Merge(tensorMsg(widths), priorBox.attr["width"])
207210
text_format.Merge(tensorMsg(heights), priorBox.attr["height"])
208211
text_format.Merge(tensorMsg([0.1, 0.1, 0.2, 0.2]), priorBox.attr["variance"])

0 commit comments

Comments
 (0)