|
1 |
| -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. |
| 1 | +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. |
2 | 2 | #
|
3 | 3 | # Licensed under the Apache License, Version 2.0 (the "License");
|
4 | 4 | # you may not use this file except in compliance with the License.
|
|
16 | 16 | """
|
17 | 17 |
|
18 | 18 | from ..layer_helper import LayerHelper
|
| 19 | +from ..framework import Variable |
| 20 | +from tensor import concat |
| 21 | +from ops import reshape |
| 22 | +import math |
19 | 23 |
|
20 |
| -__all__ = ['detection_output', ] |
| 24 | +__all__ = [ |
| 25 | + 'detection_output', |
| 26 | + 'prior_box', |
| 27 | +] |
21 | 28 |
|
22 | 29 |
|
23 | 30 | def detection_output(scores,
|
@@ -114,3 +121,208 @@ class number, M is number of bounding boxes. For each category
|
114 | 121 | 'nms_eta': 1.0
|
115 | 122 | })
|
116 | 123 | return nmsed_outs
|
| 124 | + |
| 125 | + |
| 126 | +def prior_box(inputs, |
| 127 | + image, |
| 128 | + min_ratio, |
| 129 | + max_ratio, |
| 130 | + aspect_ratios, |
| 131 | + base_size, |
| 132 | + steps=None, |
| 133 | + step_w=None, |
| 134 | + step_h=None, |
| 135 | + offset=0.5, |
| 136 | + variance=[0.1, 0.1, 0.1, 0.1], |
| 137 | + flip=False, |
| 138 | + clip=False, |
| 139 | + min_sizes=None, |
| 140 | + max_sizes=None, |
| 141 | + name=None): |
| 142 | + """ |
| 143 | + **Prior_boxes** |
| 144 | +
|
| 145 | + Generate prior boxes for SSD(Single Shot MultiBox Detector) |
| 146 | + algorithm. The details of this algorithm, please refer the |
| 147 | + section 2.2 of SSD paper (SSD: Single Shot MultiBox Detector) |
| 148 | + <https://arxiv.org/abs/1512.02325>`_ . |
| 149 | + |
| 150 | + Args: |
| 151 | + inputs(list): The list of input Variables, the format |
| 152 | + of all Variables is NCHW. |
| 153 | + image(Variable): The input image data of PriorBoxOp, |
| 154 | + the layout is NCHW. |
| 155 | + min_ratio(int): the min ratio of generated prior boxes. |
| 156 | + max_ratio(int): the max ratio of generated prior boxes. |
| 157 | + aspect_ratios(list): the aspect ratios of generated prior |
| 158 | + boxes. The length of input and aspect_ratios must be equal. |
| 159 | + base_size(int): the base_size is used to get min_size |
| 160 | + and max_size according to min_ratio and max_ratio. |
| 161 | + step_w(list, optional, default=None): Prior boxes step |
| 162 | + across width. If step_w[i] == 0.0, the prior boxes step |
| 163 | + across width of the inputs[i] will be automatically calculated. |
| 164 | + step_h(list, optional, default=None): Prior boxes step |
| 165 | + across height, If step_h[i] == 0.0, the prior boxes |
| 166 | + step across height of the inputs[i] will be automatically calculated. |
| 167 | + offset(float, optional, default=0.5): Prior boxes center offset. |
| 168 | + variance(list, optional, default=[0.1, 0.1, 0.1, 0.1]): the variances |
| 169 | + to be encoded in prior boxes. |
| 170 | + flip(bool, optional, default=False): Whether to flip |
| 171 | + aspect ratios. |
| 172 | + clip(bool, optional, default=False): Whether to clip |
| 173 | + out-of-boundary boxes. |
| 174 | + min_sizes(list, optional, default=None): If `len(inputs) <=2`, |
| 175 | + min_sizes must be set up, and the length of min_sizes |
| 176 | + should equal to the length of inputs. |
| 177 | + max_sizes(list, optional, default=None): If `len(inputs) <=2`, |
| 178 | + max_sizes must be set up, and the length of min_sizes |
| 179 | + should equal to the length of inputs. |
| 180 | + name(str, optional, None): Name of the prior box layer. |
| 181 | + |
| 182 | + Returns: |
| 183 | + boxes(Variable): the output prior boxes of PriorBoxOp. |
| 184 | + The layout is [num_priors, 4]. num_priors is the total |
| 185 | + box count of each position of inputs. |
| 186 | + Variances(Variable): the expanded variances of PriorBoxOp. |
| 187 | + The layout is [num_priors, 4]. num_priors is the total |
| 188 | + box count of each position of inputs |
| 189 | + |
| 190 | + Examples: |
| 191 | + .. code-block:: python |
| 192 | + |
| 193 | + prior_box( |
| 194 | + inputs = [conv1, conv2, conv3, conv4, conv5, conv6], |
| 195 | + image = data, |
| 196 | + min_ratio = 20, # 0.20 |
| 197 | + max_ratio = 90, # 0.90 |
| 198 | + offset = 0.5, |
| 199 | + base_size = 300, |
| 200 | + variance = [0.1,0.1,0.1,0.1], |
| 201 | + aspect_ratios = [[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]], |
| 202 | + flip=True, |
| 203 | + clip=True) |
| 204 | + """ |
| 205 | + |
| 206 | + def _prior_box_(input, |
| 207 | + image, |
| 208 | + min_sizes, |
| 209 | + max_sizes, |
| 210 | + aspect_ratios, |
| 211 | + variance, |
| 212 | + flip=False, |
| 213 | + clip=False, |
| 214 | + step_w=0.0, |
| 215 | + step_h=0.0, |
| 216 | + offset=0.5, |
| 217 | + name=None): |
| 218 | + helper = LayerHelper("prior_box", **locals()) |
| 219 | + dtype = helper.input_dtype() |
| 220 | + |
| 221 | + box = helper.create_tmp_variable(dtype) |
| 222 | + var = helper.create_tmp_variable(dtype) |
| 223 | + helper.append_op( |
| 224 | + type="prior_box", |
| 225 | + inputs={"Input": input, |
| 226 | + "Image": image}, |
| 227 | + outputs={"Boxes": box, |
| 228 | + "Variances": var}, |
| 229 | + attrs={ |
| 230 | + 'min_sizes': min_sizes, |
| 231 | + 'max_sizes': max_sizes, |
| 232 | + 'aspect_ratios': aspect_ratios, |
| 233 | + 'variances': variance, |
| 234 | + 'flip': flip, |
| 235 | + 'clip': clip, |
| 236 | + 'step_w': step_w, |
| 237 | + 'step_h': step_h, |
| 238 | + 'offset': offset |
| 239 | + }) |
| 240 | + return box, var |
| 241 | + |
| 242 | + def _reshape_with_axis_(input, axis=1): |
| 243 | + if not (axis > 0 and axis < len(input.shape)): |
| 244 | + raise ValueError("The axis should be smaller than " |
| 245 | + "the arity of input and bigger than 0.") |
| 246 | + new_shape = [ |
| 247 | + -1, reduce(lambda x, y: x * y, input.shape[axis:len(input.shape)]) |
| 248 | + ] |
| 249 | + out = reshape(x=input, shape=new_shape) |
| 250 | + return out |
| 251 | + |
| 252 | + assert isinstance(inputs, list), 'inputs should be a list.' |
| 253 | + num_layer = len(inputs) |
| 254 | + |
| 255 | + if num_layer <= 2: |
| 256 | + assert min_sizes is not None and max_sizes is not None |
| 257 | + assert len(min_sizes) == num_layer and len(max_sizes) == num_layer |
| 258 | + else: |
| 259 | + min_sizes = [] |
| 260 | + max_sizes = [] |
| 261 | + step = int(math.floor(((max_ratio - min_ratio)) / (num_layer - 2))) |
| 262 | + for ratio in xrange(min_ratio, max_ratio + 1, step): |
| 263 | + min_sizes.append(base_size * ratio / 100.) |
| 264 | + max_sizes.append(base_size * (ratio + step) / 100.) |
| 265 | + min_sizes = [base_size * .10] + min_sizes |
| 266 | + max_sizes = [base_size * .20] + max_sizes |
| 267 | + |
| 268 | + if aspect_ratios: |
| 269 | + if not (isinstance(aspect_ratios, list) and |
| 270 | + len(aspect_ratios) == num_layer): |
| 271 | + raise ValueError( |
| 272 | + 'aspect_ratios should be list and the length of inputs ' |
| 273 | + 'and aspect_ratios should be the same.') |
| 274 | + if step_h: |
| 275 | + if not (isinstance(step_h, list) and len(step_h) == num_layer): |
| 276 | + raise ValueError( |
| 277 | + 'step_h should be list and the length of inputs and ' |
| 278 | + 'step_h should be the same.') |
| 279 | + if step_w: |
| 280 | + if not (isinstance(step_w, list) and len(step_w) == num_layer): |
| 281 | + raise ValueError( |
| 282 | + 'step_w should be list and the length of inputs and ' |
| 283 | + 'step_w should be the same.') |
| 284 | + if steps: |
| 285 | + if not (isinstance(steps, list) and len(steps) == num_layer): |
| 286 | + raise ValueError( |
| 287 | + 'steps should be list and the length of inputs and ' |
| 288 | + 'step_w should be the same.') |
| 289 | + step_w = steps |
| 290 | + step_h = steps |
| 291 | + |
| 292 | + box_results = [] |
| 293 | + var_results = [] |
| 294 | + for i, input in enumerate(inputs): |
| 295 | + min_size = min_sizes[i] |
| 296 | + max_size = max_sizes[i] |
| 297 | + aspect_ratio = [] |
| 298 | + if not isinstance(min_size, list): |
| 299 | + min_size = [min_size] |
| 300 | + if not isinstance(max_size, list): |
| 301 | + max_size = [max_size] |
| 302 | + if aspect_ratios: |
| 303 | + aspect_ratio = aspect_ratios[i] |
| 304 | + if not isinstance(aspect_ratio, list): |
| 305 | + aspect_ratio = [aspect_ratio] |
| 306 | + |
| 307 | + box, var = _prior_box_(input, image, min_size, max_size, aspect_ratio, |
| 308 | + variance, flip, clip, step_w[i] |
| 309 | + if step_w else 0.0, step_h[i] |
| 310 | + if step_w else 0.0, offset) |
| 311 | + |
| 312 | + box_results.append(box) |
| 313 | + var_results.append(var) |
| 314 | + |
| 315 | + if len(box_results) == 1: |
| 316 | + box = box_results[0] |
| 317 | + var = var_results[0] |
| 318 | + else: |
| 319 | + reshaped_boxes = [] |
| 320 | + reshaped_vars = [] |
| 321 | + for i in range(len(box_results)): |
| 322 | + reshaped_boxes.append(_reshape_with_axis_(box_results[i], axis=3)) |
| 323 | + reshaped_vars.append(_reshape_with_axis_(var_results[i], axis=3)) |
| 324 | + |
| 325 | + box = concat(reshaped_boxes) |
| 326 | + var = concat(reshaped_vars) |
| 327 | + |
| 328 | + return box, var |
0 commit comments