Skip to content

Commit 35483a2

Browse files
authored
Add neural transformer leanring rate decay function. (#9951)
Add neural transformer leanring rate decay function
1 parent fbe5624 commit 35483a2

File tree

1 file changed

+30
-3
lines changed

1 file changed

+30
-3
lines changed

python/paddle/fluid/layers/learning_rate_scheduler.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
__all__ = [
2222
'exponential_decay', 'natural_exp_decay', 'inverse_time_decay',
23-
'polynomial_decay', 'piecewise_decay'
23+
'polynomial_decay', 'piecewise_decay', 'noam_decay'
2424
]
2525
"""
2626
When training a model, it's often useful to decay the
@@ -32,14 +32,41 @@
3232
"""
3333

3434

35-
def _decay_step_counter():
35+
def _decay_step_counter(begin=0):
3636
# the first global step is zero in learning rate decay
3737
global_step = nn.autoincreased_step_counter(
38-
counter_name='@LR_DECAY_COUNTER@', begin=0, step=1)
38+
counter_name='@LR_DECAY_COUNTER@', begin=begin, step=1)
3939
global_step = tensor.cast(global_step, 'float32')
4040
return global_step
4141

4242

43+
def noam_decay(d_model, warmup_steps):
44+
"""Apply decay to learning rate.
45+
```python
46+
lr_value = np.power(d_model, -0.5) * np.min([
47+
np.power(current_steps, -0.5),
48+
np.power(warmup_steps, -1.5) * current_steps
49+
])
50+
```
51+
52+
Args:
53+
d_model(Variable): The dimensionality of input and output of model.
54+
Reference: attention is all you need
55+
https://arxiv.org/pdf/1706.03762.pdf
56+
warmup_steps(Variable): A super parameter.
57+
58+
Returns:
59+
The decayed learning rate.
60+
"""
61+
global_step = _decay_step_counter(1)
62+
with init_on_cpu():
63+
a = global_step**-0.5
64+
b = (warmup_steps**-1.5) * global_step
65+
lr_value = (d_model**-0.5) * ops.elementwise_min(a, b)
66+
67+
return lr_value
68+
69+
4370
def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
4471
"""Applies exponential decay to the learning rate.
4572

0 commit comments

Comments
 (0)