Skip to content

Commit 79c5a46

Browse files
authored
Handling global step increment in optimizer python wrapper (#5097)
* Adding the increment op for global step * Changing list to single op as per code review feedback
1 parent 6783dce commit 79c5a46

File tree

2 files changed

+65
-10
lines changed

2 files changed

+65
-10
lines changed

python/paddle/v2/framework/optimizer.py

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ class Optimizer(object):
1818
but need to use one of it's implementation.
1919
"""
2020

21-
def __init__(self):
21+
def __init__(self, global_step=None):
22+
self._global_step = global_step
2223
# Dictionary of accumulators. Some optimizer subclasses need to
2324
# allocate and manage extra variables associated with the parameters
2425
# to train. These variables are called accumulators.
@@ -109,6 +110,26 @@ def _get_accumulator(self, name, param):
109110
format(name, param.name))
110111
return self._accumulators[name][param.name]
111112

113+
def _increment_global_step(self, block):
114+
"""Increment the global step by 1 after every iteration
115+
116+
Args:
117+
block: the block in which the loss variable is present
118+
119+
Returns:
120+
list with global_step increment op as its only element
121+
"""
122+
assert isinstance(block, framework.Block)
123+
assert self._global_step is not None
124+
# create the increment op
125+
increment_op = block.append_op(
126+
type="increment",
127+
inputs={"X": self._global_step},
128+
outputs={"Out": self._global_step},
129+
attrs={"step": 1.0})
130+
131+
return increment_op
132+
112133
def create_optimization_pass(self, parameters_and_grads, loss):
113134
"""Add optimization operators to update gradients to variables.
114135
@@ -152,6 +173,8 @@ def create_optimization_pass(self, parameters_and_grads, loss):
152173
if finish_ops is not None:
153174
return_ops += finish_ops
154175

176+
if self._global_step is not None:
177+
return_ops.append(self._increment_global_step(loss.block))
155178
return return_ops
156179

157180
def minimize(self, loss, parameter_list=None, no_grad_set=None):
@@ -172,9 +195,9 @@ class SGDOptimizer(Optimizer):
172195
""" Simple SGD optimizer without any state.
173196
"""
174197

175-
def __init__(self, learning_rate):
198+
def __init__(self, learning_rate, global_step=None):
176199
assert learning_rate is not None
177-
super(SGDOptimizer, self).__init__()
200+
super(SGDOptimizer, self).__init__(global_step)
178201
self.type = "sgd"
179202
self._learning_rate = learning_rate
180203

@@ -215,10 +238,14 @@ class MomentumOptimizer(Optimizer):
215238
"""
216239
_velocity_acc_str = "velocity"
217240

218-
def __init__(self, learning_rate, momentum, use_nesterov=False):
241+
def __init__(self,
242+
learning_rate,
243+
momentum,
244+
use_nesterov=False,
245+
global_step=None):
219246
assert learning_rate is not None
220247
assert momentum is not None
221-
super(MomentumOptimizer, self).__init__()
248+
super(MomentumOptimizer, self).__init__(global_step)
222249
self.type = "momentum"
223250
self._learning_rate = learning_rate
224251
self._momentum = momentum
@@ -275,10 +302,10 @@ class AdagradOptimizer(Optimizer):
275302
"""
276303
_moment_acc_str = "moment"
277304

278-
def __init__(self, learning_rate, epsilon=1.0e-6):
305+
def __init__(self, learning_rate, epsilon=1.0e-6, global_step=None):
279306
assert learning_rate is not None
280307
assert epsilon is not None
281-
super(AdagradOptimizer, self).__init__()
308+
super(AdagradOptimizer, self).__init__(global_step)
282309
self.type = "adagrad"
283310
self._learning_rate = learning_rate
284311
self._epsilon = epsilon
@@ -337,12 +364,13 @@ def __init__(self,
337364
learning_rate=0.001,
338365
beta1=0.9,
339366
beta2=0.999,
340-
epsilon=1e-8):
367+
epsilon=1e-8,
368+
global_step=None):
341369
assert learning_rate is not None
342370
assert beta1 is not None
343371
assert beta2 is not None
344372
assert epsilon is not None
345-
super(AdamOptimizer, self).__init__()
373+
super(AdamOptimizer, self).__init__(global_step)
346374
self.type = "adam"
347375
self._learning_rate = learning_rate
348376
self._beta1 = beta1
@@ -458,7 +486,8 @@ def __init__(self,
458486
learning_rate=0.001,
459487
beta1=0.9,
460488
beta2=0.999,
461-
epsilon=1e-8):
489+
epsilon=1e-8,
490+
global_step=None):
462491
assert learning_rate is not None
463492
assert beta1 is not None
464493
assert beta2 is not None

python/paddle/v2/framework/tests/test_optimizer.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,32 @@ def test_sgd_optimizer(self):
2727
sgd_op = opts[0]
2828
self.assertEqual(sgd_op.type, "sgd")
2929

30+
def test_sgd_optimizer_with_global_step(self):
31+
program = framework.Program()
32+
block = program.global_block()
33+
mul_x = block.create_parameter(
34+
dtype="float32", shape=[5, 10], lod_level=0, name="mul.x")
35+
mul_y = block.create_var(
36+
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
37+
mul_out = block.create_var(
38+
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
39+
block.append_op(
40+
type="mul",
41+
inputs={"X": mul_x,
42+
"Y": mul_y},
43+
outputs={"Out": mul_out},
44+
attrs={"x_num_col_dims": 1})
45+
global_step = block.create_var(
46+
dtype="float32", shape=[1], lod_level=0, name="step")
47+
sgd_optimizer = optimizer.SGDOptimizer(
48+
learning_rate=0.01, global_step=global_step)
49+
opts = sgd_optimizer.minimize(mul_out)
50+
self.assertEqual(len(opts), 2)
51+
sgd_op = opts[0]
52+
self.assertEqual(sgd_op.type, "sgd")
53+
increment_op = opts[1]
54+
self.assertEqual(increment_op.type, "increment")
55+
3056

3157
class TestMomentumOptimizer(unittest.TestCase):
3258
class MockMomentum(optimizer.MomentumOptimizer):

0 commit comments

Comments
 (0)