Merge pull request #8265 from chengduoZH/feature/add_prior_box_py

chengduo · web-flow · commit 7757a8ad7caa · 2018-02-12T13:00:04.000+08:00
Add Python interface of prior_boxes
diff --git a/paddle/fluid/operators/prior_box_op.cc b/paddle/fluid/operators/prior_box_op.cc
@@ -38,24 +38,24 @@ class PriorBoxOp : public framework::OperatorWithKernel {
     PADDLE_ENFORCE_LT(input_dims[3], image_dims[3],
                       "The width of input must smaller than image.");
 
-    auto min_sizes = ctx->Attrs().Get<std::vector<int>>("min_sizes");
-    auto max_sizes = ctx->Attrs().Get<std::vector<int>>("max_sizes");
+    auto min_sizes = ctx->Attrs().Get<std::vector<float>>("min_sizes");
+    auto max_sizes = ctx->Attrs().Get<std::vector<float>>("max_sizes");
     auto variances = ctx->Attrs().Get<std::vector<float>>("variances");
     auto aspect_ratios = ctx->Attrs().Get<std::vector<float>>("aspect_ratios");
     bool flip = ctx->Attrs().Get<bool>("flip");
 
     std::vector<float> aspect_ratios_vec;
     ExpandAspectRatios(aspect_ratios, flip, aspect_ratios_vec);
 
-    int num_priors = aspect_ratios_vec.size() * min_sizes.size();
+    size_t num_priors = aspect_ratios_vec.size() * min_sizes.size();
     if (max_sizes.size() > 0) {
       PADDLE_ENFORCE_EQ(max_sizes.size(), min_sizes.size(),
                         "The number of min_size and max_size must be equal.");
-      for (size_t i = 0; i < min_sizes.size(); ++i) {
+      num_priors += max_sizes.size();
+      for (size_t i = 0; i < max_sizes.size(); ++i) {
         PADDLE_ENFORCE_GT(max_sizes[i], min_sizes[i],
                           "max_size[%d] must be greater than min_size[%d].", i,
                           i);
-        num_priors += 1;
       }
     }
 
@@ -90,20 +90,20 @@ class PriorBoxOpMaker : public framework::OpProtoAndCheckerMaker {
               "H is the height of input, W is the width of input, num_priors "
               "is the box count of each position.");
 
-    AddAttr<std::vector<int>>("min_sizes",
-                              "(vector<int>) List of min sizes "
-                              "of generated prior boxes.")
-        .AddCustomChecker([](const std::vector<int>& min_sizes) {
+    AddAttr<std::vector<float>>("min_sizes",
+                                "(vector<float>) List of min sizes "
+                                "of generated prior boxes.")
+        .AddCustomChecker([](const std::vector<float>& min_sizes) {
           PADDLE_ENFORCE_GT(min_sizes.size(), 0,
                             "Size of min_sizes must be at least 1.");
           for (size_t i = 0; i < min_sizes.size(); ++i) {
-            PADDLE_ENFORCE_GT(min_sizes[i], 0,
+            PADDLE_ENFORCE_GT(min_sizes[i], 0.0,
                               "min_sizes[%d] must be positive.", i);
           }
         });
-    AddAttr<std::vector<int>>(
+    AddAttr<std::vector<float>>(
         "max_sizes",
-        "(vector<int>) List of max sizes of generated prior boxes.");
+        "(vector<float>) List of max sizes of generated prior boxes.");
     AddAttr<std::vector<float>>(
         "aspect_ratios",
         "(vector<float>) List of aspect ratios of generated prior boxes.");
@@ -125,16 +125,16 @@ class PriorBoxOpMaker : public framework::OpProtoAndCheckerMaker {
         .SetDefault(true);
 
     AddAttr<float>("step_w",
-                   "Prior boxes step across width, 0 for auto calculation.")
+                   "Prior boxes step across width, 0.0 for auto calculation.")
         .SetDefault(0.0)
         .AddCustomChecker([](const float& step_w) {
-          PADDLE_ENFORCE_GT(step_w, 0.0, "step_w should be larger than 0.");
+          PADDLE_ENFORCE_GE(step_w, 0.0, "step_w should be larger than 0.");
         });
     AddAttr<float>("step_h",
-                   "Prior boxes step across height, 0 for auto calculation.")
+                   "Prior boxes step across height, 0.0 for auto calculation.")
         .SetDefault(0.0)
         .AddCustomChecker([](const float& step_h) {
-          PADDLE_ENFORCE_GT(step_h, 0.0, "step_h should be larger than 0.");
+          PADDLE_ENFORCE_GE(step_h, 0.0, "step_h should be larger than 0.");
         });
 
     AddAttr<float>("offset",
diff --git a/paddle/fluid/operators/prior_box_op.h b/paddle/fluid/operators/prior_box_op.h
@@ -60,8 +60,8 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
     auto* boxes = ctx.Output<paddle::framework::Tensor>("Boxes");
     auto* vars = ctx.Output<paddle::framework::Tensor>("Variances");
 
-    auto min_sizes = ctx.Attr<std::vector<int>>("min_sizes");
-    auto max_sizes = ctx.Attr<std::vector<int>>("max_sizes");
+    auto min_sizes = ctx.Attr<std::vector<float>>("min_sizes");
+    auto max_sizes = ctx.Attr<std::vector<float>>("max_sizes");
     auto input_aspect_ratio = ctx.Attr<std::vector<float>>("aspect_ratios");
     auto variances = ctx.Attr<std::vector<float>>("variances");
     auto flip = ctx.Attr<bool>("flip");
@@ -108,7 +108,7 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
         T box_width, box_height;
         int idx = 0;
         for (size_t s = 0; s < min_sizes.size(); ++s) {
-          int min_size = min_sizes[s];
+          auto min_size = min_sizes[s];
           // first prior: aspect_ratio = 1, size = min_size
           box_width = box_height = min_size;
           // xmin
@@ -124,7 +124,7 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
 
           idx++;
           if (max_sizes.size() > 0) {
-            int max_size = max_sizes[s];
+            auto max_size = max_sizes[s];
             // second prior: aspect_ratio = 1,
             // size = sqrt(min_size * max_size)
             box_width = box_height = sqrt(min_size * max_size);
diff --git a/python/paddle/v2/fluid/layers/__init__.py b/python/paddle/v2/fluid/layers/__init__.py
@@ -28,13 +28,16 @@
 from device import *
 import math_op_patch
 from math_op_patch import *
+import detection
+from detection import *
 
 __all__ = []
+__all__ += math_op_patch.__all__
 __all__ += detection.__all__
 __all__ += nn.__all__
 __all__ += io.__all__
 __all__ += tensor.__all__
 __all__ += control_flow.__all__
 __all__ += ops.__all__
 __all__ += device.__all__
-__all__ += math_op_patch.__all__
+__all__ += detection.__all__
diff --git a/python/paddle/v2/fluid/layers/detection.py b/python/paddle/v2/fluid/layers/detection.py
@@ -1,4 +1,4 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,8 +16,15 @@
 """
 
 from ..layer_helper import LayerHelper
+from ..framework import Variable
+from tensor import concat
+from ops import reshape
+import math
 
-__all__ = ['detection_output', ]
+__all__ = [
+    'detection_output',
+    'prior_box',
+]
 
 
 def detection_output(scores,
@@ -114,3 +121,208 @@ class number, M is number of bounding boxes. For each category
             'nms_eta': 1.0
         })
     return nmsed_outs
+
+
+def prior_box(inputs,
+              image,
+              min_ratio,
+              max_ratio,
+              aspect_ratios,
+              base_size,
+              steps=None,
+              step_w=None,
+              step_h=None,
+              offset=0.5,
+              variance=[0.1, 0.1, 0.1, 0.1],
+              flip=False,
+              clip=False,
+              min_sizes=None,
+              max_sizes=None,
+              name=None):
+    """
+    **Prior_boxes**
+
+    Generate prior boxes for SSD(Single Shot MultiBox Detector)
+    algorithm. The details of this algorithm, please refer the
+    section 2.2 of SSD paper (SSD: Single Shot MultiBox Detector)
+    <https://arxiv.org/abs/1512.02325>`_ .
+    
+    Args:
+       inputs(list): The list of input Variables, the format
+            of all Variables is NCHW.
+       image(Variable): The input image data of PriorBoxOp,
+            the layout is NCHW.
+       min_ratio(int): the min ratio of generated prior boxes.
+       max_ratio(int): the max ratio of generated prior boxes.
+       aspect_ratios(list): the aspect ratios of generated prior
+            boxes. The length of input and aspect_ratios must be equal.
+       base_size(int): the base_size is used to get min_size
+            and max_size according to min_ratio and max_ratio.
+       step_w(list, optional, default=None): Prior boxes step
+            across width. If step_w[i] == 0.0, the prior boxes step
+            across width of the inputs[i] will be automatically calculated.
+       step_h(list, optional, default=None): Prior boxes step
+            across height, If step_h[i] == 0.0, the prior boxes
+            step across height of the inputs[i] will be automatically calculated.
+       offset(float, optional, default=0.5): Prior boxes center offset.
+       variance(list, optional, default=[0.1, 0.1, 0.1, 0.1]): the variances
+            to be encoded in prior boxes.
+       flip(bool, optional, default=False): Whether to flip
+            aspect ratios.
+       clip(bool, optional, default=False): Whether to clip
+            out-of-boundary boxes.
+       min_sizes(list, optional, default=None): If `len(inputs) <=2`,
+            min_sizes must be set up, and the length of min_sizes
+            should equal to the length of inputs.
+       max_sizes(list, optional, default=None): If `len(inputs) <=2`,
+            max_sizes must be set up, and the length of min_sizes
+            should equal to the length of inputs.
+       name(str, optional, None): Name of the prior box layer.
+    
+    Returns:
+        boxes(Variable): the output prior boxes of PriorBoxOp.
+             The layout is [num_priors, 4]. num_priors is the total
+             box count of each position of inputs.
+        Variances(Variable): the expanded variances of PriorBoxOp.
+             The layout is [num_priors, 4]. num_priors is the total
+             box count of each position of inputs
+    
+    Examples:
+        .. code-block:: python
+    
+          prior_box(
+             inputs = [conv1, conv2, conv3, conv4, conv5, conv6],
+             image = data,
+             min_ratio = 20, # 0.20
+             max_ratio = 90, # 0.90
+             offset = 0.5,
+             base_size = 300,
+             variance = [0.1,0.1,0.1,0.1],
+             aspect_ratios = [[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]],
+             flip=True,
+             clip=True)
+    """
+
+    def _prior_box_(input,
+                    image,
+                    min_sizes,
+                    max_sizes,
+                    aspect_ratios,
+                    variance,
+                    flip=False,
+                    clip=False,
+                    step_w=0.0,
+                    step_h=0.0,
+                    offset=0.5,
+                    name=None):
+        helper = LayerHelper("prior_box", **locals())
+        dtype = helper.input_dtype()
+
+        box = helper.create_tmp_variable(dtype)
+        var = helper.create_tmp_variable(dtype)
+        helper.append_op(
+            type="prior_box",
+            inputs={"Input": input,
+                    "Image": image},
+            outputs={"Boxes": box,
+                     "Variances": var},
+            attrs={
+                'min_sizes': min_sizes,
+                'max_sizes': max_sizes,
+                'aspect_ratios': aspect_ratios,
+                'variances': variance,
+                'flip': flip,
+                'clip': clip,
+                'step_w': step_w,
+                'step_h': step_h,
+                'offset': offset
+            })
+        return box, var
+
+    def _reshape_with_axis_(input, axis=1):
+        if not (axis > 0 and axis < len(input.shape)):
+            raise ValueError("The axis should be smaller than "
+                             "the arity of input and bigger than 0.")
+        new_shape = [
+            -1, reduce(lambda x, y: x * y, input.shape[axis:len(input.shape)])
+        ]
+        out = reshape(x=input, shape=new_shape)
+        return out
+
+    assert isinstance(inputs, list), 'inputs should be a list.'
+    num_layer = len(inputs)
+
+    if num_layer <= 2:
+        assert min_sizes is not None and max_sizes is not None
+        assert len(min_sizes) == num_layer and len(max_sizes) == num_layer
+    else:
+        min_sizes = []
+        max_sizes = []
+        step = int(math.floor(((max_ratio - min_ratio)) / (num_layer - 2)))
+        for ratio in xrange(min_ratio, max_ratio + 1, step):
+            min_sizes.append(base_size * ratio / 100.)
+            max_sizes.append(base_size * (ratio + step) / 100.)
+        min_sizes = [base_size * .10] + min_sizes
+        max_sizes = [base_size * .20] + max_sizes
+
+    if aspect_ratios:
+        if not (isinstance(aspect_ratios, list) and
+                len(aspect_ratios) == num_layer):
+            raise ValueError(
+                'aspect_ratios should be list and the length of inputs '
+                'and aspect_ratios should be the same.')
+    if step_h:
+        if not (isinstance(step_h, list) and len(step_h) == num_layer):
+            raise ValueError(
+                'step_h should be list and the length of inputs and '
+                'step_h should be the same.')
+    if step_w:
+        if not (isinstance(step_w, list) and len(step_w) == num_layer):
+            raise ValueError(
+                'step_w should be list and the length of inputs and '
+                'step_w should be the same.')
+    if steps:
+        if not (isinstance(steps, list) and len(steps) == num_layer):
+            raise ValueError(
+                'steps should be list and the length of inputs and '
+                'step_w should be the same.')
+        step_w = steps
+        step_h = steps
+
+    box_results = []
+    var_results = []
+    for i, input in enumerate(inputs):
+        min_size = min_sizes[i]
+        max_size = max_sizes[i]
+        aspect_ratio = []
+        if not isinstance(min_size, list):
+            min_size = [min_size]
+        if not isinstance(max_size, list):
+            max_size = [max_size]
+        if aspect_ratios:
+            aspect_ratio = aspect_ratios[i]
+            if not isinstance(aspect_ratio, list):
+                aspect_ratio = [aspect_ratio]
+
+        box, var = _prior_box_(input, image, min_size, max_size, aspect_ratio,
+                               variance, flip, clip, step_w[i]
+                               if step_w else 0.0, step_h[i]
+                               if step_w else 0.0, offset)
+
+        box_results.append(box)
+        var_results.append(var)
+
+    if len(box_results) == 1:
+        box = box_results[0]
+        var = var_results[0]
+    else:
+        reshaped_boxes = []
+        reshaped_vars = []
+        for i in range(len(box_results)):
+            reshaped_boxes.append(_reshape_with_axis_(box_results[i], axis=3))
+            reshaped_vars.append(_reshape_with_axis_(var_results[i], axis=3))
+
+        box = concat(reshaped_boxes)
+        var = concat(reshaped_vars)
+
+    return box, var
diff --git a/python/paddle/v2/fluid/tests/test_detection.py b/python/paddle/v2/fluid/tests/test_detection.py
diff --git a/python/paddle/v2/fluid/tests/test_prior_box_op.py b/python/paddle/v2/fluid/tests/test_prior_box_op.py