Use only absolute prior boxes explicit sizes. Remove scales attributes. (opencv#10874)

dkurt · vpisarev · commit 8b4871a28d41 · 2018-02-19T17:25:18.000+03:00
* Use only absolute prior boxes explicit sizes. Remove scales attributes.

* Simplified PriorBox layer forward pass
diff --git a/modules/dnn/src/layers/prior_box_layer.cpp b/modules/dnn/src/layers/prior_box_layer.cpp
@@ -179,44 +179,62 @@ class PriorBoxLayerImpl : public PriorBoxLayer
     }
 
     PriorBoxLayerImpl(const LayerParams &params)
-        : _boxWidth(0), _boxHeight(0)
     {
         setParamsFrom(params);
         _minSize = getParameter<float>(params, "min_size", 0, false, 0);
         _flip = getParameter<bool>(params, "flip", 0, false, true);
         _clip = getParameter<bool>(params, "clip", 0, false, true);
         _bboxesNormalized = getParameter<bool>(params, "normalized_bbox", 0, false, true);
 
-        _scales.clear();
         _aspectRatios.clear();
 
         getAspectRatios(params);
         getVariance(params);
-        getParams("scales", params, &_scales);
-        getParams("width", params, &_widths);
-        getParams("height", params, &_heights);
-        _explicitSizes = !_widths.empty();
-        CV_Assert(_widths.size() == _heights.size());
+
+        _maxSize = -1;
+        if (params.has("max_size"))
+        {
+            _maxSize = params.get("max_size").get<float>(0);
+            CV_Assert(_maxSize > _minSize);
+        }
+
+        std::vector<float> widths, heights;
+        getParams("width", params, &widths);
+        getParams("height", params, &heights);
+        _explicitSizes = !widths.empty();
+        CV_Assert(widths.size() == heights.size());
 
         if (_explicitSizes)
         {
             CV_Assert(_aspectRatios.empty(), !params.has("min_size"), !params.has("max_size"));
-            _numPriors = _widths.size();
+            _boxWidths = widths;
+            _boxHeights = heights;
         }
         else
         {
             CV_Assert(!_aspectRatios.empty(), _minSize > 0);
-            _numPriors = _aspectRatios.size() + 1;  // + 1 for an aspect ratio 1.0
-        }
+            _boxWidths.resize(1 + (_maxSize > 0 ? 1 : 0) + _aspectRatios.size());
+            _boxHeights.resize(_boxWidths.size());
+            _boxWidths[0] = _boxHeights[0] = _minSize;
 
-        _maxSize = -1;
-        if (params.has("max_size"))
-        {
-            _maxSize = params.get("max_size").get<float>(0);
-            CV_Assert(_maxSize > _minSize);
+            int i = 1;
+            if (_maxSize > 0)
+            {
+                // second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
+                _boxWidths[i] = _boxHeights[i] = sqrt(_minSize * _maxSize);
+                i += 1;
+            }
 
-            _numPriors += 1;
+            // rest of priors
+            for (size_t r = 0; r < _aspectRatios.size(); ++r)
+            {
+                float arSqrt = sqrt(_aspectRatios[r]);
+                _boxWidths[i + r] = _minSize * arSqrt;
+                _boxHeights[i + r] = _minSize / arSqrt;
+            }
         }
+        CV_Assert(_boxWidths.size() == _boxHeights.size());
+        _numPriors = _boxWidths.size();
 
         if (params.has("step_h") || params.has("step_w")) {
           CV_Assert(!params.has("step"));
@@ -252,8 +270,7 @@ class PriorBoxLayerImpl : public PriorBoxLayer
     virtual bool supportBackend(int backendId)
     {
         return backendId == DNN_BACKEND_DEFAULT ||
-               backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() &&
-              _scales.empty() && !_explicitSizes;
+               backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && !_explicitSizes;
     }
 
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
@@ -307,27 +324,16 @@ class PriorBoxLayerImpl : public PriorBoxLayer
         if (umat_offsetsX.empty())
         {
             Mat offsetsX(1, _offsetsX.size(), CV_32FC1, &_offsetsX[0]);
-            Mat offsetsY(1, _offsetsX.size(), CV_32FC1, &_offsetsY[0]);
-            Mat aspectRatios(1, _aspectRatios.size(), CV_32FC1, &_aspectRatios[0]);
+            Mat offsetsY(1, _offsetsY.size(), CV_32FC1, &_offsetsY[0]);
             Mat variance(1, _variance.size(), CV_32FC1, &_variance[0]);
+            Mat widths(1, _boxWidths.size(), CV_32FC1, &_boxWidths[0]);
+            Mat heights(1, _boxHeights.size(), CV_32FC1, &_boxHeights[0]);
 
             offsetsX.copyTo(umat_offsetsX);
             offsetsY.copyTo(umat_offsetsY);
-            aspectRatios.copyTo(umat_aspectRatios);
             variance.copyTo(umat_variance);
-
-            int real_numPriors = _numPriors >> (_offsetsX.size() - 1);
-            if (_scales.empty())
-            {
-                _scales.resize(real_numPriors, 1.0f);
-                umat_scales = UMat(1, &real_numPriors, CV_32F, 1.0f);
-            }
-            else
-            {
-                CV_Assert(_scales.size() == real_numPriors);
-                Mat scales(1, _scales.size(), CV_32FC1, &_scales[0]);
-                scales.copyTo(umat_scales);
-            }
+            widths.copyTo(umat_widths);
+            heights.copyTo(umat_heights);
         }
 
         size_t nthreads = _layerHeight * _layerWidth;
@@ -336,19 +342,17 @@ class PriorBoxLayerImpl : public PriorBoxLayer
         kernel.set(0, (int)nthreads);
         kernel.set(1, (float)stepX);
         kernel.set(2, (float)stepY);
-        kernel.set(3, (float)_minSize);
-        kernel.set(4, (float)_maxSize);
-        kernel.set(5, ocl::KernelArg::PtrReadOnly(umat_offsetsX));
-        kernel.set(6, ocl::KernelArg::PtrReadOnly(umat_offsetsY));
-        kernel.set(7, (int)_offsetsX.size());
-        kernel.set(8, ocl::KernelArg::PtrReadOnly(umat_aspectRatios));
-        kernel.set(9, (int)_aspectRatios.size());
-        kernel.set(10, ocl::KernelArg::PtrReadOnly(umat_scales));
-        kernel.set(11, ocl::KernelArg::PtrWriteOnly(outputs[0]));
-        kernel.set(12, (int)_layerHeight);
-        kernel.set(13, (int)_layerWidth);
-        kernel.set(14, (int)_imageHeight);
-        kernel.set(15, (int)_imageWidth);
+        kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_offsetsX));
+        kernel.set(4, ocl::KernelArg::PtrReadOnly(umat_offsetsY));
+        kernel.set(5, (int)_offsetsX.size());
+        kernel.set(6, ocl::KernelArg::PtrReadOnly(umat_widths));
+        kernel.set(7, ocl::KernelArg::PtrReadOnly(umat_heights));
+        kernel.set(8, (int)_boxWidths.size());
+        kernel.set(9, ocl::KernelArg::PtrWriteOnly(outputs[0]));
+        kernel.set(10, (int)_layerHeight);
+        kernel.set(11, (int)_layerWidth);
+        kernel.set(12, (int)_imageHeight);
+        kernel.set(13, (int)_imageWidth);
         kernel.run(1, &nthreads, NULL, false);
 
         // clip the prior's coordidate such that it is within [0, 1]
@@ -401,12 +405,6 @@ class PriorBoxLayerImpl : public PriorBoxLayer
 
         CV_Assert(inputs.size() == 2);
 
-        size_t real_numPriors = _numPriors >> (_offsetsX.size() - 1);
-        if (_scales.empty())
-            _scales.resize(real_numPriors, 1.0f);
-        else
-            CV_Assert(_scales.size() == real_numPriors);
-
         int _layerWidth = inputs[0]->size[3];
         int _layerHeight = inputs[0]->size[2];
 
@@ -425,72 +423,15 @@ class PriorBoxLayerImpl : public PriorBoxLayer
         int _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;
 
         float* outputPtr = outputs[0].ptr<float>();
+        float _boxWidth, _boxHeight;
         for (size_t h = 0; h < _layerHeight; ++h)
         {
             for (size_t w = 0; w < _layerWidth; ++w)
             {
-                // first prior: aspect_ratio = 1, size = min_size
-                if (_explicitSizes)
+                for (size_t i = 0; i < _boxWidths.size(); ++i)
                 {
-                    _boxWidth = _widths[0] * _scales[0];
-                    _boxHeight = _heights[0] * _scales[0];
-                    if (_bboxesNormalized)
-                    {
-                        _boxWidth *= _imageWidth;
-                        _boxHeight *= _imageHeight;
-                    }
-                }
-                else
-                    _boxWidth = _boxHeight = _minSize * _scales[0];
-
-                for (int i = 0; i < _offsetsX.size(); ++i)
-                {
-                    float center_x = (w + _offsetsX[i]) * stepX;
-                    float center_y = (h + _offsetsY[i]) * stepY;
-                    outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth,
-                                         _imageHeight, _bboxesNormalized, outputPtr);
-                }
-                if (_maxSize > 0)
-                {
-                    // second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
-                    _boxWidth = _boxHeight = sqrt(_minSize * _maxSize) * _scales[1];
-                    for (int i = 0; i < _offsetsX.size(); ++i)
-                    {
-                        float center_x = (w + _offsetsX[i]) * stepX;
-                        float center_y = (h + _offsetsY[i]) * stepY;
-                        outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth,
-                                             _imageHeight, _bboxesNormalized, outputPtr);
-                    }
-                }
-
-                // rest of priors
-                CV_Assert(_aspectRatios.empty() || (_maxSize > 0 ? 2 : 1) + _aspectRatios.size() == _scales.size());
-                for (size_t r = 0; r < _aspectRatios.size(); ++r)
-                {
-                    float ar = _aspectRatios[r];
-                    float scale = _scales[(_maxSize > 0 ? 2 : 1) + r];
-                    _boxWidth = _minSize * sqrt(ar) * scale;
-                    _boxHeight = _minSize / sqrt(ar) * scale;
-                    for (int i = 0; i < _offsetsX.size(); ++i)
-                    {
-                        float center_x = (w + _offsetsX[i]) * stepX;
-                        float center_y = (h + _offsetsY[i]) * stepY;
-                        outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth,
-                                             _imageHeight, _bboxesNormalized, outputPtr);
-                    }
-                }
-
-                // rest of sizes
-                CV_Assert(_widths.empty() || _widths.size() == _scales.size());
-                for (size_t i = 1; i < _widths.size(); ++i)
-                {
-                    _boxWidth = _widths[i] * _scales[i];
-                    _boxHeight = _heights[i] * _scales[i];
-                    if (_bboxesNormalized)
-                    {
-                        _boxWidth *= _imageWidth;
-                        _boxHeight *= _imageHeight;
-                    }
+                    _boxWidth = _boxWidths[i];
+                    _boxHeight = _boxHeights[i];
                     for (int j = 0; j < _offsetsX.size(); ++j)
                     {
                         float center_x = (w + _offsetsX[j]) * stepX;
@@ -591,24 +532,21 @@ class PriorBoxLayerImpl : public PriorBoxLayer
     float _minSize;
     float _maxSize;
 
-    float _boxWidth;
-    float _boxHeight;
-
     float _stepX, _stepY;
 
     std::vector<float> _aspectRatios;
     std::vector<float> _variance;
-    std::vector<float> _scales;
-    std::vector<float> _widths;
-    std::vector<float> _heights;
     std::vector<float> _offsetsX;
     std::vector<float> _offsetsY;
+    // Precomputed final widhts and heights based on aspect ratios or explicit sizes.
+    std::vector<float> _boxWidths;
+    std::vector<float> _boxHeights;
 
 #ifdef HAVE_OPENCL
     UMat umat_offsetsX;
     UMat umat_offsetsY;
-    UMat umat_aspectRatios;
-    UMat umat_scales;
+    UMat umat_widths;
+    UMat umat_heights;
     UMat umat_variance;
 #endif
 
diff --git a/modules/dnn/src/opencl/prior_box.cl b/modules/dnn/src/opencl/prior_box.cl
@@ -45,14 +45,12 @@
 __kernel void prior_box(const int nthreads,
                         const Dtype stepX,
                         const Dtype stepY,
-                        const Dtype _minSize,
-                        const Dtype _maxSize,
                         __global const Dtype* _offsetsX,
                         __global const Dtype* _offsetsY,
                         const int offsetsX_size,
-                        __global const Dtype* _aspectRatios,
-                        const int aspectRatios_size,
-                        __global const Dtype* scales,
+                        __global const Dtype* _widths,
+                        __global const Dtype* _heights,
+                        const int widths_size,
                         __global Dtype* dst,
                         const int _layerHeight,
                         const int _layerWidth,
@@ -64,57 +62,19 @@ __kernel void prior_box(const int nthreads,
         int w = index % _layerWidth;
         int h = index / _layerWidth;
         __global Dtype* outputPtr;
-        int aspect_count = (_maxSize > 0) ? 1 : 0;
-        outputPtr = dst + index * 4 * offsetsX_size * (1 + aspect_count + aspectRatios_size);
+
+        outputPtr = dst + index * 4 * offsetsX_size * widths_size;
 
         Dtype _boxWidth, _boxHeight;
         Dtype4 vec;
-        _boxWidth = _boxHeight = _minSize * scales[0];
-        for (int i = 0; i < offsetsX_size; ++i)
-        {
-            float center_x = (w + _offsetsX[i]) * stepX;
-            float center_y = (h + _offsetsY[i]) * stepY;
-
-            vec.x = (center_x - _boxWidth * 0.5f) / imgWidth;    // xmin
-            vec.y = (center_y - _boxHeight * 0.5f) / imgHeight;  // ymin
-            vec.z = (center_x + _boxWidth * 0.5f) / imgWidth;    // xmax
-            vec.w = (center_y + _boxHeight * 0.5f) / imgHeight;  // ymax
-            vstore4(vec, 0, outputPtr);
-
-            outputPtr += 4;
-        }
-
-        if (_maxSize > 0)
-        {
-            _boxWidth = _boxHeight = native_sqrt(_minSize * _maxSize) * scales[1];
-
-            for (int i = 0; i < offsetsX_size; ++i)
-            {
-                float center_x = (w + _offsetsX[i]) * stepX;
-                float center_y = (h + _offsetsY[i]) * stepY;
-
-                vec.x = (center_x - _boxWidth * 0.5f) / imgWidth;    // xmin
-                vec.y = (center_y - _boxHeight * 0.5f) / imgHeight;  // ymin
-                vec.z = (center_x + _boxWidth * 0.5f) / imgWidth;    // xmax
-                vec.w = (center_y + _boxHeight * 0.5f) / imgHeight;  // ymax
-                vstore4(vec, 0, outputPtr);
-
-                outputPtr += 4;
-            }
-        }
-
-        for (int r = 0; r < aspectRatios_size; ++r)
+        for (int i = 0; i < widths_size; ++i)
         {
-            float ar = native_sqrt(_aspectRatios[r]);
-            float scale = scales[(_maxSize > 0 ? 2 : 1) + r];
-
-            _boxWidth = _minSize * ar * scale;
-            _boxHeight = _minSize / ar * scale;
-
-            for (int i = 0; i < offsetsX_size; ++i)
+            _boxWidth = _widths[i];
+            _boxHeight = _heights[i];
+            for (int j = 0; j < offsetsX_size; ++j)
             {
-                float center_x = (w + _offsetsX[i]) * stepX;
-                float center_y = (h + _offsetsY[i]) * stepY;
+                float center_x = (w + _offsetsX[j]) * stepX;
+                float center_y = (h + _offsetsY[j]) * stepY;
 
                 vec.x = (center_x - _boxWidth * 0.5f) / imgWidth;    // xmin
                 vec.y = (center_y - _boxHeight * 0.5f) / imgHeight;  // ymin
diff --git a/samples/dnn/tf_text_graph_ssd.py b/samples/dnn/tf_text_graph_ssd.py
@@ -26,6 +26,8 @@
 parser.add_argument('--num_layers', default=6, type=int, help='Hyper-parameter of ssd_anchor_generator from config file.')
 parser.add_argument('--aspect_ratios', default=[1.0, 2.0, 0.5, 3.0, 0.333], type=float, nargs='+',
                     help='Hyper-parameter of ssd_anchor_generator from config file.')
+parser.add_argument('--image_width', default=300, type=int, help='Training images width.')
+parser.add_argument('--image_height', default=300, type=int, help='Training images height.')
 args = parser.parse_args()
 
 # Nodes that should be kept.
@@ -192,7 +194,6 @@ def tensorMsg(values):
 
     text_format.Merge('b: false', priorBox.attr["flip"])
     text_format.Merge('b: false', priorBox.attr["clip"])
-    text_format.Merge('b: true', priorBox.attr["normalized_bbox"])
 
     if i == 0:
         widths = [args.min_scale * 0.5, args.min_scale * sqrt(2.0), args.min_scale * sqrt(0.5)]
@@ -203,6 +204,8 @@ def tensorMsg(values):
 
         widths += [sqrt(scales[i] * scales[i + 1])]
         heights += [sqrt(scales[i] * scales[i + 1])]
+    widths = [w * args.image_width for w in widths]
+    heights = [h * args.image_height for h in heights]
     text_format.Merge(tensorMsg(widths), priorBox.attr["width"])
     text_format.Merge(tensorMsg(heights), priorBox.attr["height"])
     text_format.Merge(tensorMsg([0.1, 0.1, 0.2, 0.2]), priorBox.attr["variance"])