Skip to content

Commit 569f7c4

Browse files
authored
enforce shape of backward target to be {1} (#5745)
* enforce shape of backward target to be {1} * fix test_regularizer.py * rm unused code * fix backward_test * fix a type bug * fix test_program
1 parent 6cfcf62 commit 569f7c4

File tree

5 files changed

+69
-23
lines changed

5 files changed

+69
-23
lines changed

paddle/framework/backward.cc

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -513,19 +513,14 @@ ParamGradInfoMap AppendBackward(
513513
const int root_block_idx = 0;
514514
auto root_block = program_desc.MutableBlock(root_block_idx);
515515

516-
// insert fill one op for target
517-
// TODO(qiao) add some check to the target.
518516
std::string fill_one_op_out = GradVarName(target.Name());
519-
std::vector<int64_t> target_shape_desc = target.Shape();
520-
std::vector<int> target_shape;
521-
std::transform(target_shape_desc.begin(), target_shape_desc.end(),
522-
std::back_inserter(target_shape),
523-
[](int64_t dim) { return static_cast<int>(dim); });
517+
bool is_scalar = target.Shape() == std::vector<int64_t>{1};
518+
PADDLE_ENFORCE(is_scalar, "target should be scalar");
524519
VLOG(3) << "backward from loss=" << target.Name()
525520
<< " data_type=" << target.GetDataType();
526521
std::unique_ptr<OpDescBind> fill_one_op(
527522
new OpDescBind("fill_constant", {}, {{"Out", {fill_one_op_out}}},
528-
{{"shape", target_shape},
523+
{{"shape", std::vector<int>{1}},
529524
{"value", static_cast<float>(1.0)},
530525
{"data_type", target.GetDataType()}}));
531526
// infer var type of fill_one_op

paddle/framework/backward_test.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,7 @@ TEST(Backward, simple_single_op) {
508508
op->SetOutput("Out", {"out"});
509509

510510
auto target = f::VarDescBind("out");
511+
target.SetShape({1});
511512
auto var_to_grad = AppendBackward(program, target, {});
512513

513514
ASSERT_EQ(block->AllOps().size(), 3UL);
@@ -544,6 +545,7 @@ TEST(Backward, default_attribute) {
544545
op->CheckAttrs();
545546

546547
auto target = f::VarDescBind("out");
548+
target.SetShape({1});
547549
AppendBackward(program, target, {});
548550

549551
ASSERT_EQ(block->AllOps().size(), 3UL);
@@ -581,6 +583,7 @@ TEST(Backward, simple_mult_op) {
581583
op3->SetOutput("Out", {"out3"});
582584

583585
auto target = f::VarDescBind("out3");
586+
target.SetShape({1});
584587
size_t forward_len = block->AllOps().size();
585588
auto var_to_grad = AppendBackward(program, target, {});
586589

@@ -670,6 +673,7 @@ TEST(Backward, intermedia_var_no_grad) {
670673
op4->SetOutput("Out", {"out4"});
671674

672675
auto target = f::VarDescBind("out4");
676+
target.SetShape({1});
673677
size_t forward_len = block->AllOps().size();
674678
auto var_to_grad = AppendBackward(program, target, {"out3"});
675679

@@ -730,6 +734,7 @@ TEST(Backward, var_no_grad) {
730734
op2->SetOutput("Z", {"z2"});
731735

732736
auto target = f::VarDescBind("z2");
737+
target.SetShape({1});
733738
size_t forward_len = block->AllOps().size();
734739
auto var_to_grad = AppendBackward(program, target, {"z1"});
735740

@@ -810,6 +815,7 @@ TEST(Backward, shared_var) {
810815
op3->SetOutput("Out", {"out3"});
811816

812817
auto target = f::VarDescBind("out3");
818+
target.SetShape({1});
813819
size_t forward_len = block->AllOps().size();
814820
auto var_to_grad = AppendBackward(program, target, {});
815821

@@ -888,6 +894,7 @@ TEST(Backward, half_backward) {
888894
op1->SetOutput("Out", {"out"});
889895

890896
auto target = f::VarDescBind("out");
897+
target.SetShape({1});
891898
size_t forward_len = block->AllOps().size();
892899
auto var_to_grad = AppendBackward(program, target, {"b"});
893900
f::OpDescBind *fill_op = block->AllOps()[forward_len];

python/paddle/v2/fluid/tests/test_optimizer.py

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,18 @@ def test_sgd_optimizer(self):
1616
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
1717
mul_out = block.create_var(
1818
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
19+
mean_out = block.create_var(
20+
dtype="float32", shape=[1], lod_level=0, name="mean.out")
1921
block.append_op(
2022
type="mul",
2123
inputs={"X": mul_x,
2224
"Y": mul_y},
2325
outputs={"Out": mul_out},
2426
attrs={"x_num_col_dims": 1})
27+
block.append_op(
28+
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
2529
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.01)
26-
opts = sgd_optimizer.minimize(mul_out, init_program)
30+
opts = sgd_optimizer.minimize(mean_out, init_program)
2731
self.assertEqual(len(opts), 1)
2832
sgd_op = opts[0]
2933
self.assertEqual(sgd_op.type, "sgd")
@@ -44,12 +48,16 @@ def test_sgd_optimizer_with_global_step(self):
4448
"Y": mul_y},
4549
outputs={"Out": mul_out},
4650
attrs={"x_num_col_dims": 1})
51+
mean_out = block.create_var(
52+
dtype="float32", shape=[1], lod_level=0, name="mean.out")
53+
block.append_op(
54+
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
4755
global_step = block.create_var(
4856
dtype="float32", shape=[1], lod_level=0, name="step")
4957
learning_rate = 0.01
5058
sgd_optimizer = optimizer.SGDOptimizer(
5159
learning_rate=learning_rate, global_step=global_step)
52-
opts = sgd_optimizer.minimize(mul_out, init_program)
60+
opts = sgd_optimizer.minimize(mean_out, init_program)
5361
self.assertEqual(len(opts), 2)
5462
sgd_op = opts[0]
5563
self.assertEqual(sgd_op.type, "sgd")
@@ -90,7 +98,11 @@ def test_vanilla_momentum_optimizer(self):
9098
learning_rate = 0.01
9199
momentum_optimizer = self.MockMomentum(
92100
learning_rate=learning_rate, momentum=0.2)
93-
params_grads = append_backward_ops(mul_out)
101+
mean_out = block.create_var(
102+
dtype="float32", shape=[1], lod_level=0, name="mean.out")
103+
block.append_op(
104+
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
105+
params_grads = append_backward_ops(mean_out)
94106
self.assertEqual(len(params_grads), 1)
95107
self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
96108
opts = momentum_optimizer.create_optimization_pass(
@@ -132,10 +144,14 @@ def test_nesterov_momentum_optimizer(self):
132144
"Y": mul_y},
133145
outputs={"Out": mul_out},
134146
attrs={"x_num_col_dims": 1})
147+
mean_out = block.create_var(
148+
dtype="float32", shape=[1], lod_level=0, name="mean.out")
149+
block.append_op(
150+
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
135151
learning_rate = 0.01
136152
momentum_optimizer = self.MockMomentum(
137153
learning_rate=learning_rate, momentum=0.2, use_nesterov=True)
138-
params_grads = append_backward_ops(mul_out)
154+
params_grads = append_backward_ops(mean_out)
139155
self.assertEqual(len(params_grads), 1)
140156
self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
141157
opts = momentum_optimizer.create_optimization_pass(
@@ -186,10 +202,14 @@ def test_adagrad_optimizer(self):
186202
"Y": mul_y},
187203
outputs={"Out": mul_out},
188204
attrs={"x_num_col_dims": 1})
205+
mean_out = block.create_var(
206+
dtype="float32", shape=[1], lod_level=0, name="mean.out")
207+
block.append_op(
208+
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
189209
learning_rate = 0.01
190210
adagrad_optimizer = self.MockAdagrad(
191211
learning_rate=learning_rate, epsilon=1.0e-6)
192-
params_grads = append_backward_ops(mul_out)
212+
params_grads = append_backward_ops(mean_out)
193213
self.assertEqual(len(params_grads), 1)
194214
self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0)
195215
opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out,
@@ -242,10 +262,14 @@ def test_adam_optimizer(self):
242262
"Y": mul_y},
243263
outputs={"Out": mul_out},
244264
attrs={"x_num_col_dims": 1})
265+
mean_out = block.create_var(
266+
dtype="float32", shape=[1], lod_level=0, name="mean.out")
267+
block.append_op(
268+
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
245269
learning_rate = 0.01
246270
adam_optimizer = self.MockAdam(
247271
learning_rate=learning_rate, beta1=0.9, beta2=0.999)
248-
params_grads = append_backward_ops(mul_out)
272+
params_grads = append_backward_ops(mean_out)
249273
self.assertEqual(len(params_grads), 1)
250274
self.assertEqual(len(adam_optimizer.get_accumulators()), 0)
251275
opts = adam_optimizer.create_optimization_pass(params_grads, mul_out,
@@ -300,10 +324,14 @@ def test_adamax_optimizer(self):
300324
"Y": mul_y},
301325
outputs={"Out": mul_out},
302326
attrs={"x_num_col_dims": 1})
327+
mean_out = block.create_var(
328+
dtype="float32", shape=[1], lod_level=0, name="mean.out")
329+
block.append_op(
330+
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
303331
learning_rate = 0.01
304332
adamax_optimizer = self.MockAdamax(
305333
learning_rate=learning_rate, beta1=0.9, beta2=0.999)
306-
params_grads = append_backward_ops(mul_out)
334+
params_grads = append_backward_ops(mean_out)
307335
self.assertEqual(len(params_grads), 1)
308336
self.assertEqual(len(adamax_optimizer.get_accumulators()), 0)
309337
opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out,
@@ -355,10 +383,14 @@ def test_decayed_adagrad_optimizer(self):
355383
"Y": mul_y},
356384
outputs={"Out": mul_out},
357385
attrs={"x_num_col_dims": 1})
386+
mean_out = block.create_var(
387+
dtype="float32", shape=[1], lod_level=0, name="mean.out")
388+
block.append_op(
389+
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
358390
learning_rate = 0.01
359391
decayed_adagrad_optimizer = self.MockDecayedAdagrad(
360392
learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6)
361-
params_grads = append_backward_ops(mul_out)
393+
params_grads = append_backward_ops(mean_out)
362394
self.assertEqual(len(params_grads), 1)
363395
self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0)
364396
opts = decayed_adagrad_optimizer.create_optimization_pass(

python/paddle/v2/fluid/tests/test_program.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import unittest
22

3-
import paddle.v2.fluid.core as core
43
from paddle.v2.fluid.framework import Program
54
from paddle.v2.fluid.framework import g_main_program
65

@@ -98,21 +97,26 @@ def test_append_backward(self):
9897
"Y": add_y},
9998
outputs={"Out": add_out},
10099
attrs={"x_num_col_dims": 1})
100+
mean_out = block.create_var(
101+
dtype="float32", shape=[1], lod_level=0, name="mean.out")
102+
block.append_op(
103+
type="mean", inputs={"X": add_out}, outputs={"Out": mean_out})
101104

102105
self.assertEqual(mul_op.idx, 0)
103106
self.assertEqual(add_op.idx, 1)
104-
param_to_grad = prog.append_backward(add_out, set())
107+
param_to_grad = prog.append_backward(mean_out, set())
105108

106109
def grad_name(name):
107110
return name + "@GRAD"
108111

109-
for var_name in ("mul.x", "mul.y", "mul.out", "add.y", "add.out"):
112+
for var_name in ("mul.x", "mul.y", "mul.out", "add.y", "add.out",
113+
"mean.out"):
110114
self.assertEqual(param_to_grad[var_name][0], grad_name(var_name))
111115
self.assertEqual(param_to_grad[var_name][1], 0)
112116

113117
expect_ops = [
114-
"mul", "elementwise_add", "fill_constant", "elementwise_add_grad",
115-
"mul_grad"
118+
"mul", "elementwise_add", "mean", "fill_constant", "mean_grad",
119+
"elementwise_add_grad", "mul_grad"
116120
]
117121
actual_ops = []
118122
for op in block.ops:

python/paddle/v2/fluid/tests/test_regularizer.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,11 @@ def test_l2decay_regularizer(self):
2929
"Y": mul_y},
3030
outputs={"Out": mul_out},
3131
attrs={"x_num_col_dims": 1})
32-
params_grads = append_backward_ops(mul_out)
32+
mean_out = block.create_var(
33+
dtype="float32", shape=[1], lod_level=0, name="mean.out")
34+
block.append_op(
35+
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
36+
params_grads = append_backward_ops(mean_out)
3337
self.assertEqual(len(params_grads), 1)
3438
count_ops = len(block.ops)
3539
params_grads = optimizer.append_regularization_ops(params_grads)
@@ -62,7 +66,11 @@ def test_l2decay_regularizer(self):
6266
"Y": mul_y},
6367
outputs={"Out": mul_out},
6468
attrs={"x_num_col_dims": 1})
65-
params_grads = append_backward_ops(mul_out)
69+
mean_out = block.create_var(
70+
dtype="float32", shape=[1], lod_level=0, name="mean.out")
71+
block.append_op(
72+
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
73+
params_grads = append_backward_ops(mean_out)
6674
self.assertEqual(len(params_grads), 1)
6775
count_ops = len(block.ops)
6876
params_grads = optimizer.append_regularization_ops(params_grads)

0 commit comments

Comments
 (0)