Skip to content

Commit e9b8ebf

Browse files
author
xuwei06
committed
Correctly handling variable with batch dimension for math ops.
When the second argument contains batch dimension, the axis should be 0. Also makes elementwise ops more tolerant at handling tensors with trailing singular dimensions.
1 parent 7d56c6d commit e9b8ebf

File tree

6 files changed

+94
-74
lines changed

6 files changed

+94
-74
lines changed

paddle/fluid/operators/elementwise_op.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,17 @@ smaller than or equal to the dimensions of $X$.
6565
6666
There are two cases for this operator:
6767
1. The shape of $Y$ is same with $X$;
68-
2. The shape of $Y$ is a subset of $X$.
68+
2. The shape of $Y$ is a congiguous subsequencet of $X$. The trailing dimensions
69+
of size 1 for $Y$ will be ignored for the consideration of subsequence.
70+
6971
7072
For case 2:
73+
7174
$Y$ will be broadcasted to match the shape of $X$ and axis should be
7275
set to index of the start dimension to broadcast $Y$ onto $X$.
7376
77+
If axis is -1, it is treated as axis=rank(X)-rank(Y).
78+
7479
For example
7580
.. code-block:: python
7681
@@ -79,6 +84,7 @@ For example
7984
shape(X) = (2, 3, 4, 5), shape(Y) = (4, 5)
8085
shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1
8186
shape(X) = (2, 3, 4, 5), shape(Y) = (2), with axis=0
87+
shape(X) = (2, 3, 4, 5), shape(Y) = (2, 1), with axis=0
8288
8389
Either of the inputs $X$ and $Y$ or none can carry the LoD (Level of Details)
8490
information. However, the output only shares the LoD information with input $X$.

paddle/fluid/operators/elementwise_op_function.h

Lines changed: 20 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,19 @@ inline void get_mid_dims(const framework::DDim& x_dims,
6161
}
6262
}
6363

64+
inline void trim_trailing_singular_dims(framework::DDim& dims) {
65+
// Remove trailing dimensions of size 1 for y
66+
auto actual_dims_size = dims.size();
67+
for (; actual_dims_size != 0; --actual_dims_size) {
68+
if (dims[actual_dims_size - 1] != 1) break;
69+
}
70+
if (actual_dims_size != dims.size()) {
71+
auto actual_dims = framework::vectorize(dims);
72+
actual_dims.resize(actual_dims_size);
73+
dims = framework::make_ddim(actual_dims);
74+
}
75+
}
76+
6477
template <typename T, typename DeviceContext>
6578
class RowwiseTransformIterator;
6679
template <typename T, typename DeviceContext>
@@ -263,44 +276,6 @@ class TransformFunctor {
263276
} \
264277
}
265278

266-
template <class functor, typename DeviceContext, typename T>
267-
void ElementwiseCompute(const framework::ExecutionContext& ctx) {
268-
using Tensor = framework::Tensor;
269-
270-
auto* x = ctx.Input<Tensor>("X");
271-
auto* y = ctx.Input<Tensor>("Y");
272-
auto* z = ctx.Output<Tensor>("Out");
273-
z->mutable_data<T>(ctx.GetPlace());
274-
275-
auto x_dims = x->dims();
276-
auto y_dims = y->dims();
277-
PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(),
278-
"Rank of first input must >= rank of second input.");
279-
280-
if (x_dims == y_dims) {
281-
functor f;
282-
f.template Run<DeviceContext, T>(x, y, z, ctx);
283-
return;
284-
}
285-
286-
int axis = ctx.Attr<int>("axis");
287-
axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
288-
PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
289-
"Axis should be in range [0, x_dims)");
290-
291-
int pre, n, post;
292-
get_mid_dims(x_dims, y_dims, axis, pre, n, post);
293-
if (post == 1) {
294-
functor f;
295-
f.template RunBroadCast<DeviceContext, T>(x, y, z, ctx, pre, n);
296-
return;
297-
} else {
298-
functor f;
299-
f.template RunBroadCast2<DeviceContext, T>(x, y, z, ctx, pre, n, post);
300-
return;
301-
}
302-
}
303-
304279
#define EIGEN_ADD(x, y) ((x) + (y))
305280
EIGEN_FUNCTOR(Add, EIGEN_ADD);
306281

@@ -516,14 +491,10 @@ void ElemwiseGradCompute(const framework::ExecutionContext& ctx,
516491
auto x_dim = x.dims();
517492
auto y_dim = y.dims();
518493

519-
if (y_dim.size() == 1 && y_dim[0] == 1) {
520-
// y is a scalar
521-
auto extended_dims = framework::vectorize(x_dim);
522-
extended_dims.push_back(1);
523-
x_dim = framework::make_ddim(extended_dims);
524-
}
525-
526494
axis = (axis == -1 ? x_dim.size() - y_dim.size() : axis);
495+
trim_trailing_singular_dims(y_dim);
496+
axis = (y_dim.size() == 0) ? x_dim.size() : axis;
497+
527498
int pre, n, post;
528499
get_mid_dims(x_dim, y_dim, axis, pre, n, post);
529500
if (post == 1) {
@@ -591,14 +562,9 @@ void ElementwiseGradCompute(const framework::ExecutionContext& ctx,
591562
return;
592563
}
593564

594-
if (y_dims.size() == 1 && y_dims[0] == 1) {
595-
// y is a scalar
596-
auto extended_dims = framework::vectorize(x_dims);
597-
extended_dims.push_back(1);
598-
x_dims = framework::make_ddim(extended_dims);
599-
}
600-
601565
axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
566+
trim_trailing_singular_dims(y_dims);
567+
axis = (y_dims.size() == 0) ? x_dims.size() : axis;
602568

603569
int pre, n, post;
604570
get_mid_dims(x_dims, y_dims, axis, pre, n, post);
@@ -633,16 +599,11 @@ void ElementwiseComputeEx(const framework::ExecutionContext& ctx,
633599
return;
634600
}
635601

636-
if (y_dims.size() == 1 && y_dims[0] == 1) {
637-
// y is a scalar
638-
auto extended_dims = framework::vectorize(x_dims);
639-
extended_dims.push_back(1);
640-
x_dims = framework::make_ddim(extended_dims);
641-
}
642-
643602
axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
644603
PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
645604
"Axis should be in range [0, x_dims)");
605+
trim_trailing_singular_dims(y_dims);
606+
axis = (y_dims.size() == 0) ? x_dims.size() : axis;
646607

647608
int pre, n, post;
648609
get_mid_dims(x_dims, y_dims, axis, pre, n, post);

python/paddle/fluid/executor.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
import numpy as np
1616
import contextlib
17-
from framework import Program, default_main_program
17+
from framework import Program, default_main_program, Variable
1818
from . import core
1919

2020
__all__ = [
@@ -281,6 +281,8 @@ def run(self,
281281

282282
if not has_fetch_operators(global_block, fetch_list, fetch_var_name):
283283
for i, var in enumerate(fetch_list):
284+
assert isinstance(var, Variable) or isinstance(var, str), (
285+
"Wrong type for fetch_list[%s]: %s" % (i, type(var)))
284286
global_block.append_op(
285287
type='fetch',
286288
inputs={'X': [var]},

python/paddle/fluid/layers/math_op_patch.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2-
#
2+
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
55
# You may obtain a copy of the License at
6-
#
6+
#
77
# http://www.apache.org/licenses/LICENSE-2.0
8-
#
8+
#
99
# Unless required by applicable law or agreed to in writing, software
1010
# distributed under the License is distributed on an "AS IS" BASIS,
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -53,12 +53,22 @@ def create_tensor_with_batchsize(ref_var, value, dtype):
5353
value = float(value)
5454
tmp_name = unique_tmp_name()
5555
var = ref_var.block.create_var(name=tmp_name, dtype=dtype)
56+
batch_dim = -1
57+
for i, d in enumerate(ref_var.shape):
58+
if d < 0:
59+
batch_dim = i
60+
break
61+
assert batch_dim != -1
5662
ref_var.block.append_op(
5763
type='fill_constant_batch_size_like',
5864
outputs={'Out': [var]},
5965
inputs={'Input': [ref_var]},
60-
attrs={'shape': ref_var.shape,
61-
'value': value})
66+
attrs={
67+
'shape': ref_var.shape,
68+
'value': value,
69+
'input_dim_idx': batch_dim,
70+
'output_dim_idx': batch_dim
71+
})
6272
return var
6373

6474
def astype(self, dtype):
@@ -118,11 +128,20 @@ def __impl__(self, other_var):
118128
tmp_name = unique_tmp_name()
119129
out = self.block.create_var(name=tmp_name, dtype=lhs_dtype)
120130

131+
axis = -1
132+
if other_var.shape[0] == -1:
133+
axis = 0
134+
assert len(self.shape) >= len(other_var.shape), (
135+
"The rank of the first argument of an binary operator cannot "
136+
"be smaller than the rank of its second argument: %s vs %s" %
137+
(len(self.shape), len(other_var.shape)))
138+
121139
self.block.append_op(
122140
type=op_type,
123141
inputs={'X': [self],
124142
'Y': [other_var]},
125-
outputs={'Out': out})
143+
outputs={'Out': out},
144+
attrs={'axis': axis})
126145
return out
127146

128147
comment = OpProtoHolder.instance().get_op_proto(op_type).comment
@@ -131,7 +150,7 @@ def __impl__(self, other_var):
131150
{0}
132151
Args:
133152
self(Variable): left hand variable
134-
other_var(Variable|float|int): right hand variable
153+
other_var(Variable|float|int): right hand variable
135154
136155
Returns:
137156
Variable

python/paddle/fluid/tests/unittests/test_elementwise_add_op.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,16 @@ def setUp(self):
5050
self.outputs = {'Out': self.inputs['X'] + self.inputs['Y']}
5151

5252

53+
class TestElementwiseAddOp_scalar2(TestElementwiseOp):
54+
def setUp(self):
55+
self.op_type = "elementwise_add"
56+
self.inputs = {
57+
'X': np.random.rand(2, 3, 4).astype(np.float32),
58+
'Y': np.random.rand(1, 1).astype(np.float32)
59+
}
60+
self.outputs = {'Out': self.inputs['X'] + self.inputs['Y']}
61+
62+
5363
class TestElementwiseAddOp_Vector(TestElementwiseOp):
5464
def setUp(self):
5565
self.op_type = "elementwise_add"
@@ -115,6 +125,20 @@ def setUp(self):
115125
}
116126

117127

128+
class TestElementwiseAddOp_broadcast_4(TestElementwiseOp):
129+
def setUp(self):
130+
self.op_type = "elementwise_add"
131+
self.inputs = {
132+
'X': np.random.rand(2, 3, 4, 5).astype(np.float32),
133+
'Y': np.random.rand(2, 1).astype(np.float32)
134+
}
135+
136+
self.attrs = {'axis': 0}
137+
self.outputs = {
138+
'Out': self.inputs['X'] + self.inputs['Y'].reshape(2, 1, 1, 1)
139+
}
140+
141+
118142
class TestElementwiseAddOp_rowwise_add_0(TestElementwiseOp):
119143
def setUp(self):
120144
self.op_type = "elementwise_add"

python/paddle/fluid/tests/unittests/test_math_op_patch.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2-
#
2+
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
55
# You may obtain a copy of the License at
6-
#
6+
#
77
# http://www.apache.org/licenses/LICENSE-2.0
8-
#
8+
#
99
# Unless required by applicable law or agreed to in writing, software
1010
# distributed under the License is distributed on an "AS IS" BASIS,
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -23,13 +23,21 @@ class TestMathOpPatches(unittest.TestCase):
2323
def test_add_scalar(self):
2424
a = fluid.layers.data(name="a", shape=[1])
2525
b = a + 10
26+
ab = fluid.layers.concat(input=[a, b], axis=1)
27+
c = ab + 10
28+
d = ab + a
29+
# e = a + ab
2630
place = fluid.CPUPlace()
2731
exe = fluid.Executor(place)
2832
a_np = numpy.random.random(size=[10, 1]).astype('float32')
29-
b_np = exe.run(fluid.default_main_program(),
30-
feed={"a": a_np},
31-
fetch_list=[b])
33+
b_np, c_np, d_np = exe.run(fluid.default_main_program(),
34+
feed={"a": a_np},
35+
fetch_list=[b, c, d])
3236
self.assertTrue(numpy.allclose(a_np + 10, b_np))
37+
ab_np = numpy.concatenate([a_np, b_np], axis=1)
38+
self.assertTrue(numpy.allclose(ab_np + 10, c_np))
39+
d_expected = ab_np + numpy.concatenate([a_np, a_np], axis=1)
40+
self.assertTrue(numpy.allclose(d_expected, d_np))
3341

3442
@decorators.prog_scope()
3543
def test_radd_scalar(self):

0 commit comments

Comments
 (0)