Skip to content

Commit d3fbede

Browse files
authored
Merge pull request #8564 from reyoung/feature/add_global_step
Add global_step in nn.py
2 parents db77006 + 2a4e7ad commit d3fbede

File tree

7 files changed

+113
-147
lines changed

7 files changed

+113
-147
lines changed

python/paddle/fluid/layer_helper.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,9 +330,28 @@ def create_variable(self, *args, **kwargs):
330330
return self.main_program.current_block().create_var(*args, **kwargs)
331331

332332
def create_global_variable(self, persistable=False, *args, **kwargs):
333+
"""
334+
create global variable, note that there is no initializer for this global variable.
335+
Args:
336+
persistable(bool): True if it is a checkpoint value.
337+
*args: See create_var's documentation
338+
**kwargs: See create_var's documentation
339+
340+
Returns(Variable): the created variable.
341+
"""
333342
return self.main_program.global_block().create_var(
334343
*args, persistable=persistable, **kwargs)
335344

345+
def create_or_get_global_variable(self, name, *args, **kwargs):
346+
"""
347+
Creates a global variable if not exists and returns the variable and
348+
a boolean flag which is true when it is a new variable.
349+
"""
350+
if self.main_program.global_block().has_var(name):
351+
return self.main_program.global_block().var(name), False
352+
else:
353+
return self.create_global_variable(name=name, *args, **kwargs), True
354+
336355
def set_variable_initializer(self, var, initializer):
337356
assert isinstance(var, Variable)
338357
self.startup_program.global_block().create_var(

python/paddle/fluid/layers/nn.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
'softmax_with_cross_entropy',
7171
'smooth_l1',
7272
'one_hot',
73+
'autoincreased_step_counter',
7374
]
7475

7576

@@ -3236,3 +3237,34 @@ def one_hot(input, depth):
32363237
attrs={'depth': depth},
32373238
outputs={'Out': one_hot_out})
32383239
return one_hot_out
3240+
3241+
3242+
def autoincreased_step_counter(counter_name=None, begin=1, step=1):
3243+
"""
3244+
NOTE: The counter will be automatically increased by 1 every mini-batch
3245+
Return the run counter of the main program, which is started with 1.
3246+
3247+
Args:
3248+
counter_name(str): The counter name, default is '@STEP_COUNTER@'.
3249+
begin(int): The first value of this counter.
3250+
step(int): The increment step between each execution.
3251+
3252+
Returns(Variable): The global run counter.
3253+
"""
3254+
helper = LayerHelper('global_step_counter')
3255+
if counter_name is None:
3256+
counter_name = '@STEP_COUNTER@'
3257+
counter, is_new_var = helper.create_or_get_global_variable(
3258+
name=counter_name, dtype='int64', shape=[1], persistable=True)
3259+
if is_new_var:
3260+
helper.set_variable_initializer(
3261+
counter, initializer=Constant(
3262+
value=begin - 1, force_cpu=True))
3263+
helper.main_program.global_block().prepend_op(
3264+
type='increment',
3265+
inputs={'X': [counter]},
3266+
outputs={'Out': [counter]},
3267+
attrs={'step': float(step)})
3268+
counter.stop_gradient = True
3269+
3270+
return counter

python/paddle/fluid/learning_rate_decay.py

Lines changed: 44 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
# limitations under the License.
1414

1515
import layers
16-
from framework import Variable
1716
from initializer import init_on_cpu
1817

1918
__all__ = [
@@ -30,11 +29,15 @@
3029
"""
3130

3231

33-
def exponential_decay(learning_rate,
34-
global_step,
35-
decay_steps,
36-
decay_rate,
37-
staircase=False):
32+
def _decay_step_counter():
33+
# the first global step is zero in learning rate decay
34+
global_step = layers.autoincreased_step_counter(
35+
counter_name='@LR_DECAY_COUNTER@', begin=0, step=1)
36+
global_step = layers.cast(global_step, 'float32')
37+
return global_step
38+
39+
40+
def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
3841
"""Applies exponential decay to the learning rate.
3942
4043
```python
@@ -44,16 +47,14 @@ def exponential_decay(learning_rate,
4447
Args:
4548
learning_rate: A scalar float32 value or a Variable. This
4649
will be the initial learning rate during training
47-
global_step: A Variable that record the training step.
4850
decay_steps: A Python `int32` number.
4951
decay_rate: A Python `float` number.
5052
staircase: Boolean. If set true, decay the learning rate every decay_steps.
5153
5254
Returns:
5355
The decayed learning rate
5456
"""
55-
if not isinstance(global_step, Variable):
56-
raise ValueError("global_step is required for exponential_decay.")
57+
global_step = _decay_step_counter()
5758

5859
with init_on_cpu():
5960
# update learning_rate
@@ -65,32 +66,25 @@ def exponential_decay(learning_rate,
6566
return decayed_lr
6667

6768

68-
def natural_exp_decay(learning_rate,
69-
global_step,
70-
decay_steps,
71-
decay_rate,
72-
staircase=False):
69+
def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
7370
"""Applies natural exponential decay to the initial learning rate.
7471
75-
```python
76-
if not staircase:
77-
decayed_learning_rate = learning_rate * exp(- decay_rate * (global_step / decay_steps))
78-
else:
79-
decayed_learning_rate = learning_rate * exp(- decay_rate * (global_step / decay_steps))
80-
```
72+
>>> if not staircase:
73+
>>> decayed_learning_rate = learning_rate * exp(- decay_rate * (global_step / decay_steps))
74+
>>> else:
75+
>>> decayed_learning_rate = learning_rate * exp(- decay_rate * (global_step / decay_steps))
76+
8177
Args:
8278
learning_rate: A scalar float32 value or a Variable. This
8379
will be the initial learning rate during training
84-
global_step: A Variable that record the training step.
8580
decay_steps: A Python `int32` number.
8681
decay_rate: A Python `float` number.
8782
staircase: Boolean. If set true, decay the learning rate every decay_steps.
8883
8984
Returns:
9085
The decayed learning rate
9186
"""
92-
if not isinstance(global_step, Variable):
93-
raise ValueError("global_step is required for natural_exp_decay.")
87+
global_step = _decay_step_counter()
9488

9589
with init_on_cpu():
9690
div_res = global_step / decay_steps
@@ -101,32 +95,25 @@ def natural_exp_decay(learning_rate,
10195
return decayed_lr
10296

10397

104-
def inverse_time_decay(learning_rate,
105-
global_step,
106-
decay_steps,
107-
decay_rate,
108-
staircase=False):
98+
def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
10999
"""Applies inverse time decay to the initial learning rate.
110100
111-
```python
112-
if staircase:
113-
decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))
114-
else:
115-
decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step)
116-
```
101+
>>> if staircase:
102+
>>> decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))
103+
>>> else:
104+
>>> decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step)
105+
117106
Args:
118107
learning_rate: A scalar float32 value or a Variable. This
119-
will be the initial learning rate during training
120-
global_step: A Variable that record the training step.
108+
will be the initial learning rate during training.
121109
decay_steps: A Python `int32` number.
122110
decay_rate: A Python `float` number.
123111
staircase: Boolean. If set true, decay the learning rate every decay_steps.
124112
125113
Returns:
126114
The decayed learning rate
127115
"""
128-
if not isinstance(global_step, Variable):
129-
raise ValueError("global_step is required for inverse_time_decay.")
116+
global_step = _decay_step_counter()
130117

131118
with init_on_cpu():
132119
div_res = global_step / decay_steps
@@ -139,26 +126,22 @@ def inverse_time_decay(learning_rate,
139126

140127

141128
def polynomial_decay(learning_rate,
142-
global_step,
143129
decay_steps,
144130
end_learning_rate=0.0001,
145131
power=1.0,
146132
cycle=False):
147133
"""Applies polynomial decay to the initial learning rate.
148134
149-
```python
150-
if cycle:
151-
decay_steps = decay_steps * ceil(global_step / decay_steps)
152-
else:
153-
global_step = min(global_step, decay_steps)
154-
decayed_learning_rate = (learning_rate - end_learning_rate) *
155-
(1 - global_step / decay_steps) ^ power +
156-
end_learning_rate
157-
```
135+
>>> if cycle:
136+
>>> decay_steps = decay_steps * ceil(global_step / decay_steps)
137+
>>> else:
138+
>>> global_step = min(global_step, decay_steps)
139+
>>> decayed_learning_rate = (learning_rate - end_learning_rate) *
140+
>>> (1 - global_step / decay_steps) ^ power +
141+
>>> end_learning_rate
158142
Args:
159143
learning_rate: A scalar float32 value or a Variable. This
160144
will be the initial learning rate during training
161-
global_step: A Variable that record the training step.
162145
decay_steps: A Python `int32` number.
163146
end_learning_rate: A Python `float` number.
164147
power: A Python `float` number
@@ -167,8 +150,7 @@ def polynomial_decay(learning_rate,
167150
Returns:
168151
The decayed learning rate
169152
"""
170-
if not isinstance(global_step, Variable):
171-
raise ValueError("global_step is required for inverse_time_decay.")
153+
global_step = _decay_step_counter()
172154

173155
with init_on_cpu():
174156
if cycle:
@@ -193,27 +175,24 @@ def polynomial_decay(learning_rate,
193175
return decayed_lr
194176

195177

196-
def piecewise_decay(global_step, boundaries, values):
178+
def piecewise_decay(boundaries, values):
197179
"""Applies piecewise decay to the initial learning rate.
198180
199-
```python
200-
boundaries = [10000, 20000]
201-
values = [1.0, 0.5, 0.1]
202-
203-
if step < 10000:
204-
learning_rate = 1.0
205-
elif step >= 10000 and step < 20000:
206-
learning_rate = 0.5
207-
else:
208-
learning_rate = 0.1
209-
```
181+
>>> boundaries = [10000, 20000]
182+
>>> values = [1.0, 0.5, 0.1]
183+
>>>
184+
>>> if step < 10000:
185+
>>> learning_rate = 1.0
186+
>>> elif 10000 <= step < 20000:
187+
>>> learning_rate = 0.5
188+
>>> else:
189+
>>> learning_rate = 0.1
210190
"""
211191

212192
if len(values) - len(boundaries) != 1:
213193
raise ValueError("len(values) - len(boundaries) should be 1")
214194

215-
if not isinstance(global_step, Variable):
216-
raise ValueError("global_step is required for piecewise_decay.")
195+
global_step = _decay_step_counter()
217196

218197
with init_on_cpu():
219198
lr = layers.create_global_var(

python/paddle/fluid/optimizer.py

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,10 @@ class Optimizer(object):
3535
but need to use one of it's implementation.
3636
"""
3737

38-
def __init__(self, learning_rate, global_step=None, regularization=None):
38+
def __init__(self, learning_rate, regularization=None):
3939
if not isinstance(learning_rate, float) and \
4040
not isinstance(learning_rate, framework.Variable):
4141
raise TypeError("learning rate should be float or Variable")
42-
self._global_step = global_step
4342
self.regularization = regularization
4443
self._learning_rate = learning_rate
4544
# each program should have a independent learning rate
@@ -159,26 +158,6 @@ def _get_accumulator(self, name, param):
159158
format(name, param.name))
160159
return self._accumulators[name][param.name]
161160

162-
def _increment_global_step(self, block):
163-
"""Increment the global step by 1 after every iteration
164-
165-
Args:
166-
block: the block in which the loss variable is present
167-
168-
Returns:
169-
list with global_step increment op as its only element
170-
"""
171-
assert isinstance(block, framework.Block)
172-
assert self._global_step is not None
173-
# create the increment op
174-
increment_op = block.append_op(
175-
type="increment",
176-
inputs={"X": self._global_step},
177-
outputs={"Out": self._global_step},
178-
attrs={"step": 1.0})
179-
180-
return increment_op
181-
182161
def create_optimization_pass(self,
183162
parameters_and_grads,
184163
loss,
@@ -225,8 +204,6 @@ def create_optimization_pass(self,
225204
# FIXME: Need to fix this once we figure out how to handle dependencies
226205
self._finish_update(loss.block)
227206

228-
if self._global_step is not None:
229-
self._increment_global_step(loss.block)
230207
end = len(global_block.ops)
231208
return global_block.slice_ops(start, end)
232209

python/paddle/fluid/tests/book/test_label_semantic_roles.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -169,16 +169,12 @@ def train(use_cuda, save_dirname=None, is_local=True):
169169

170170
# TODO(qiao)
171171
# check other optimizers and check why out will be NAN
172-
global_step = fluid.layers.create_global_var(
173-
shape=[1], value=0, dtype='float32', force_cpu=True, persistable=True)
174172
sgd_optimizer = fluid.optimizer.SGD(
175173
learning_rate=fluid.learning_rate_decay.exponential_decay(
176174
learning_rate=0.0001,
177-
global_step=global_step,
178175
decay_steps=100000,
179176
decay_rate=0.5,
180-
staircase=True),
181-
global_step=global_step)
177+
staircase=True))
182178
optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
183179

184180
# TODO(qiao)

0 commit comments

Comments
 (0)