Skip to content

Commit 9048229

Browse files
authored
[Cherry pick] Remove unnecessary op when trainable is false (#19434)
* fix optimizer bug test=develop
1 parent 5b3d33b commit 9048229

File tree

3 files changed

+103
-6
lines changed

3 files changed

+103
-6
lines changed

python/paddle/fluid/backward.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -712,8 +712,7 @@ def append_backward(loss, parameter_list=None, no_grad_set=None,
712712
parameters = parameter_list
713713
else:
714714
params = program.global_block().all_parameters()
715-
program.global_block().iter_parameters()
716-
parameters = [param.name for param in params]
715+
parameters = [param.name for param in params if param.trainable]
717716

718717
params_and_grads = []
719718
for param in parameters:

python/paddle/fluid/optimizer.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -360,8 +360,9 @@ def _create_optimization_pass(self, parameters_and_grads):
360360
global_block = framework.default_main_program().global_block()
361361
start = len(global_block.ops)
362362
self.helper = LayerHelper(self.__class__.__name__)
363-
self._create_accumulators(global_block,
364-
[p[0] for p in parameters_and_grads])
363+
self._create_accumulators(
364+
global_block,
365+
[p[0] for p in parameters_and_grads if p[0].trainable])
365366
self._create_global_learning_rate()
366367

367368
optimize_ops = []
@@ -587,6 +588,20 @@ def minimize(self,
587588
tuple: (optimize_ops, params_grads) which are, list of operators appended;
588589
and list of (param, grad) Variables pair for optimization.
589590
"""
591+
assert isinstance(loss, Variable), "The loss should be an Variable."
592+
if no_grad_set is None:
593+
no_grad_set = set()
594+
elif isinstance(no_grad_set, set) or isinstance(
595+
no_grad_set, list) or isinstance(no_grad_set, tuple):
596+
no_grad_set = set(no_grad_set)
597+
else:
598+
assert "no_grad_set should be a set, but the passed type is {}".format(
599+
type(no_grad_set))
600+
parameters = loss.block.program.global_block().all_parameters()
601+
param_no_trainable = set(
602+
[param.name for param in parameters if param.trainable is False])
603+
# If the parameter is no trainable, it should not have a gradient.
604+
no_grad_set.update(param_no_trainable)
590605
params_grads = self.backward(
591606
loss,
592607
startup_program=startup_program,
@@ -1390,7 +1405,7 @@ def _finish_update(self, block, param_and_grads):
13901405
assert isinstance(block, framework.Block)
13911406
main_block = block.program.global_block()
13921407
for param, grad in param_and_grads:
1393-
if grad is None:
1408+
if grad is None or param.trainable is False:
13941409
continue
13951410
with param.block.program._optimized_guard(
13961411
[param, grad]), name_scope("optimizer"):
@@ -1553,7 +1568,7 @@ def _finish_update(self, block, parameters_and_grads):
15531568
assert isinstance(block, framework.Block)
15541569
main_block = block.program.global_block()
15551570
for param, grad in parameters_and_grads:
1556-
if grad is None:
1571+
if grad is None or param.trainable is False:
15571572
continue
15581573
with param.block.program._optimized_guard(
15591574
[param, grad]), name_scope('adamx'):
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import print_function
16+
17+
from collections import Counter
18+
import unittest
19+
import paddle.fluid as fluid
20+
from simple_nets import init_data
21+
22+
23+
def test_trainable():
24+
x = fluid.layers.data(name='image', shape=[784], dtype='float32')
25+
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
26+
feature = fluid.layers.fc(input=x,
27+
size=10,
28+
param_attr=fluid.ParamAttr(trainable=False))
29+
loss = fluid.layers.cross_entropy(input=feature, label=label)
30+
loss = fluid.layers.mean(loss)
31+
return loss
32+
33+
34+
class TestTrainable(unittest.TestCase):
35+
def check_trainable(self,
36+
model,
37+
feed_dict,
38+
op_count,
39+
optimizer=fluid.optimizer.Adam()):
40+
place = fluid.CPUPlace()
41+
exe = fluid.Executor(place)
42+
43+
main = fluid.Program()
44+
startup = fluid.Program()
45+
46+
with fluid.program_guard(main, startup):
47+
loss = model()
48+
optimizer.minimize(loss)
49+
50+
# The number of adam should be one.
51+
ops = Counter([op.type for op in main.global_block().ops])
52+
for op in op_count:
53+
if op_count[op] == 0:
54+
assert op not in ops
55+
else:
56+
assert ops[op] == op_count[op]
57+
58+
exe.run(fluid.default_startup_program())
59+
exe.run(feed=feed_dict)
60+
61+
def test_trainable(self):
62+
batch_size = 2
63+
img, label = init_data(batch_size, img_shape=[784], label_range=9)
64+
feed_dict = {'image': img, 'label': label}
65+
# Note that, because the Weight of FC is not trainable and the x is stop_gradient,
66+
# so the 'mul_grad' should not be appended.
67+
self.check_trainable(
68+
test_trainable,
69+
feed_dict,
70+
op_count={'adam': 1,
71+
'scale': 2,
72+
'mul_grad': 0})
73+
self.check_trainable(
74+
test_trainable,
75+
feed_dict,
76+
op_count={'adamax': 1,
77+
'scale': 1,
78+
'mul_grad': 0},
79+
optimizer=fluid.optimizer.Adamax(learning_rate=0.2))
80+
81+
82+
if __name__ == '__main__':
83+
unittest.main()

0 commit comments

Comments
 (0)