Skip to content

Commit 9333a62

Browse files
author
Bai Yifan
authored
Add flatten op interface and enhance APIs about detection to support variable-length image. (#12422)
* add flatten api&enhance detection api * unify shape_op data type * update API.spec
1 parent f276006 commit 9333a62

File tree

8 files changed

+115
-23
lines changed

8 files changed

+115
-23
lines changed

paddle/fluid/API.spec

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ paddle.fluid.layers.relu ArgSpec(args=['x'], varargs=None, keywords=None, defaul
159159
paddle.fluid.layers.log ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
160160
paddle.fluid.layers.crop ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
161161
paddle.fluid.layers.rank_loss ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,))
162+
paddle.fluid.layers.flatten ArgSpec(args=['x', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None))
162163
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
163164
paddle.fluid.layers.open_recordio_file ArgSpec(args=['filename', 'shapes', 'lod_levels', 'dtypes', 'pass_num', 'for_parallel'], varargs=None, keywords=None, defaults=(1, True))
164165
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
-16 KB
Binary file not shown.

paddle/fluid/operators/shape_op.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class ShapeOpMaker : public framework::OpProtoAndCheckerMaker {
3838
AddInput("Input", "(Tensor), The input tensor.");
3939
AddOutput("Out",
4040
"(Tensor), The shape of input tensor, the data type of the shape"
41-
" is int64_t, will be on the same device with the input Tensor.");
41+
" is int32_t, will be on the same device with the input Tensor.");
4242
AddComment(R"DOC(
4343
Shape Operator
4444
@@ -53,5 +53,5 @@ Get the shape of input tensor. Only support CPU input Tensor now.
5353
namespace ops = paddle::operators;
5454
REGISTER_OPERATOR(shape, ops::ShapeOp, ops::ShapeOpMaker,
5555
paddle::framework::EmptyGradOpMaker);
56-
REGISTER_OP_CPU_KERNEL(shape, ops::ShapeKernel<int>, ops::ShapeKernel<int64_t>,
56+
REGISTER_OP_CPU_KERNEL(shape, ops::ShapeKernel<int>, ops::ShapeKernel<int32_t>,
5757
ops::ShapeKernel<float>, ops::ShapeKernel<double>);

paddle/fluid/operators/shape_op.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,6 @@ limitations under the License. */
1515
#include "paddle/fluid/operators/shape_op.h"
1616

1717
REGISTER_OP_CUDA_KERNEL(shape, paddle::operators::ShapeKernel<int>,
18-
paddle::operators::ShapeKernel<int64_t>,
18+
paddle::operators::ShapeKernel<int32_t>,
1919
paddle::operators::ShapeKernel<float>,
2020
paddle::operators::ShapeKernel<double>);

paddle/fluid/operators/shape_op.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class ShapeKernel : public framework::OpKernel<T> {
2727
void Compute(const framework::ExecutionContext& ctx) const override {
2828
auto* in_t = ctx.Input<Tensor>("Input");
2929
auto* out_t = ctx.Output<Tensor>("Out");
30-
auto out_data = out_t->mutable_data<int64_t>(platform::CPUPlace());
30+
auto out_data = out_t->mutable_data<int32_t>(platform::CPUPlace());
3131
auto in_dims = in_t->dims();
3232
for (int i = 0; i < in_dims.size(); ++i) {
3333
out_data[i] = in_dims[i];

python/paddle/fluid/layers/detection.py

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020
from ..layer_helper import LayerHelper
2121
from . import tensor
2222
from . import nn
23+
from . import ops
2324
import math
25+
import numpy
2426
from functools import reduce
2527

2628
__all__ = [
@@ -264,10 +266,11 @@ class number, M is number of bounding boxes. For each category
264266
prior_box_var=prior_box_var,
265267
target_box=loc,
266268
code_type='decode_center_size')
267-
old_shape = scores.shape
268-
scores = nn.reshape(x=scores, shape=(-1, old_shape[-1]))
269+
compile_shape = scores.shape
270+
run_shape = ops.shape(scores)
271+
scores = nn.flatten(x=scores, axis=2)
269272
scores = nn.softmax(input=scores)
270-
scores = nn.reshape(x=scores, shape=old_shape)
273+
scores = nn.reshape(x=scores, shape=compile_shape, actual_shape=run_shape)
271274
scores = nn.transpose(scores, perm=[0, 2, 1])
272275
scores.stop_gradient = True
273276
nmsed_outs = helper.create_tmp_variable(dtype=decoded_box.dtype)
@@ -677,9 +680,10 @@ def ssd_loss(location,
677680
raise ValueError("Only support mining_type == max_negative now.")
678681

679682
num, num_prior, num_class = confidence.shape
683+
conf_shape = ops.shape(confidence)
680684

681685
def __reshape_to_2d(var):
682-
return nn.reshape(x=var, shape=[-1, var.shape[-1]])
686+
return nn.flatten(x=var, axis=2)
683687

684688
# 1. Find matched boundding box by prior box.
685689
# 1.1 Compute IOU similarity between ground-truth boxes and prior boxes.
@@ -690,7 +694,8 @@ def __reshape_to_2d(var):
690694

691695
# 2. Compute confidence for mining hard examples
692696
# 2.1. Get the target label based on matched indices
693-
gt_label = nn.reshape(x=gt_label, shape=gt_label.shape + (1, ))
697+
gt_label = nn.reshape(
698+
x=gt_label, shape=(len(gt_label.shape) - 1) * (0, ) + (-1, 1))
694699
gt_label.stop_gradient = True
695700
target_label, _ = target_assign(
696701
gt_label, matched_indices, mismatch_value=background_label)
@@ -701,9 +706,12 @@ def __reshape_to_2d(var):
701706
target_label = __reshape_to_2d(target_label)
702707
target_label.stop_gradient = True
703708
conf_loss = nn.softmax_with_cross_entropy(confidence, target_label)
704-
705709
# 3. Mining hard examples
706-
conf_loss = nn.reshape(x=conf_loss, shape=(num, num_prior))
710+
conf_loss = nn.reshape(
711+
x=conf_loss,
712+
shape=(num, num_prior),
713+
actual_shape=ops.slice(
714+
conf_shape, axes=[0], starts=[0], ends=[2]))
707715
conf_loss.stop_gradient = True
708716
neg_indices = helper.create_tmp_variable(dtype='int32')
709717
dtype = matched_indices.dtype
@@ -772,7 +780,11 @@ def __reshape_to_2d(var):
772780
# 5.3 Compute overall weighted loss.
773781
loss = conf_loss_weight * conf_loss + loc_loss_weight * loc_loss
774782
# reshape to [N, Np], N is the batch size and Np is the prior box number.
775-
loss = nn.reshape(x=loss, shape=[-1, num_prior])
783+
loss = nn.reshape(
784+
x=loss,
785+
shape=(num, num_prior),
786+
actual_shape=ops.slice(
787+
conf_shape, axes=[0], starts=[0], ends=[2]))
776788
loss = nn.reduce_sum(loss, dim=1, keep_dim=True)
777789
if normalize:
778790
normalizer = nn.reduce_sum(target_loc_weight)
@@ -1005,13 +1017,7 @@ def multi_box_head(inputs,
10051017
"""
10061018

10071019
def _reshape_with_axis_(input, axis=1):
1008-
if not (axis > 0 and axis < len(input.shape)):
1009-
raise ValueError("The axis should be smaller than "
1010-
"the arity of input and bigger than 0.")
1011-
new_shape = [
1012-
-1, reduce(lambda x, y: x * y, input.shape[axis:len(input.shape)])
1013-
]
1014-
out = nn.reshape(x=input, shape=new_shape)
1020+
out = nn.flatten(x=input, axis=axis)
10151021
return out
10161022

10171023
def _is_list_or_tuple_(data):
@@ -1101,11 +1107,13 @@ def _is_list_or_tuple_and_equal(data, length, err_info):
11011107
stride=stride)
11021108

11031109
mbox_loc = nn.transpose(mbox_loc, perm=[0, 2, 3, 1])
1104-
new_shape = [
1110+
compile_shape = [
11051111
mbox_loc.shape[0],
11061112
mbox_loc.shape[1] * mbox_loc.shape[2] * mbox_loc.shape[3] / 4, 4
11071113
]
1108-
mbox_loc_flatten = nn.reshape(mbox_loc, shape=new_shape)
1114+
run_shape = tensor.assign(numpy.array([0, -1, 4]).astype("int32"))
1115+
mbox_loc_flatten = nn.reshape(
1116+
mbox_loc, shape=compile_shape, actual_shape=run_shape)
11091117
mbox_locs.append(mbox_loc_flatten)
11101118

11111119
# get conf
@@ -1117,11 +1125,15 @@ def _is_list_or_tuple_and_equal(data, length, err_info):
11171125
padding=pad,
11181126
stride=stride)
11191127
conf_loc = nn.transpose(conf_loc, perm=[0, 2, 3, 1])
1120-
new_shape = [
1128+
new_shape = [0, -1, num_classes]
1129+
compile_shape = [
11211130
conf_loc.shape[0], conf_loc.shape[1] * conf_loc.shape[2] *
11221131
conf_loc.shape[3] / num_classes, num_classes
11231132
]
1124-
conf_loc_flatten = nn.reshape(conf_loc, shape=new_shape)
1133+
run_shape = tensor.assign(
1134+
numpy.array([0, -1, num_classes]).astype("int32"))
1135+
conf_loc_flatten = nn.reshape(
1136+
conf_loc, shape=compile_shape, actual_shape=run_shape)
11251137
mbox_confs.append(conf_loc_flatten)
11261138

11271139
if len(box_results) == 1:

python/paddle/fluid/layers/nn.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@
112112
'log',
113113
'crop',
114114
'rank_loss',
115+
'flatten',
115116
]
116117

117118

@@ -5361,3 +5362,70 @@ def rank_loss(label, left, right, name=None):
53615362
"Right": right},
53625363
outputs={'Out': out})
53635364
return out
5365+
5366+
5367+
def flatten(x, axis=1, name=None):
5368+
"""
5369+
**Flatten layer**
5370+
Flattens the input tensor into a 2D matrix.
5371+
5372+
Examples:
5373+
Case 1:
5374+
Given
5375+
X.shape = (3, 100, 100, 4)
5376+
and
5377+
axis = 2
5378+
We get:
5379+
Out.shape = (3 * 100, 4 * 100)
5380+
5381+
Case 2:
5382+
Given
5383+
X.shape = (3, 100, 100, 4)
5384+
and
5385+
axis = 0
5386+
We get:
5387+
Out.shape = (1, 3 * 100 * 100 * 4)
5388+
5389+
Args:
5390+
x (Variable): A tensor of rank >= axis.
5391+
axis (int): Indicate up to which input dimensions (exclusive) should
5392+
be flattened to the outer dimension of the output.
5393+
The value for axis must be in the range [0, R], where R
5394+
is the rank of the input tensor. When axis = 0, the shape
5395+
of the output tensor is (1, (d_0 X d_1 ... d_n), where the
5396+
shape of the input tensor is (d_0, d_1, ... d_n).
5397+
name(str|None): A name for this layer(optional). If set None, the layer
5398+
will be named automatically.
5399+
5400+
Returns:
5401+
Variable: A 2D tensor with the contents of the input tensor, with input
5402+
dimensions up to axis flattened to the outer dimension of
5403+
the output and remaining input dimensions flattened into the
5404+
inner dimension of the output.
5405+
5406+
Raises:
5407+
ValueError: If x is not a variable.
5408+
ValueError: If axis is not in range [0, rank(x)].
5409+
5410+
Examples:
5411+
5412+
.. code-block:: python
5413+
5414+
x = fluid.layers.data(name="x", shape=[4, 4, 3], dtype="float32")
5415+
out = fluid.layers.flatten(x=x, axis=2)
5416+
"""
5417+
helper = LayerHelper('flatten', **locals())
5418+
5419+
if not (isinstance(x, Variable)):
5420+
raise ValueError("The input x should be a Variable")
5421+
5422+
if not (isinstance(axis, int)) or axis > len(x.shape) or axis < 0:
5423+
raise ValueError("The axis should be a int, and in range [0, rank(x)]")
5424+
5425+
out = helper.create_tmp_variable(x.dtype)
5426+
helper.append_op(
5427+
type='flatten',
5428+
inputs={"X": x},
5429+
outputs={'Out': out},
5430+
attrs={"axis": axis})
5431+
return out

python/paddle/fluid/tests/unittests/test_layers.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,17 @@ def test_rank_loss(self):
465465
self.assertIsNotNone(out)
466466
print(str(program))
467467

468+
def test_flatten(self):
469+
program = Program()
470+
with program_guard(program):
471+
x = layers.data(
472+
name='x',
473+
append_batch_size=False,
474+
shape=[4, 4, 3],
475+
dtype="float32")
476+
out = layers.flatten(x, axis=1, name="flatten")
477+
self.assertIsNotNone(out)
478+
468479
def test_shape(self):
469480
program = Program()
470481
with program_guard(program):

0 commit comments

Comments
 (0)