Skip to content

Commit 7e526a6

Browse files
Merge pull request #9213 from wanghaoshuang/adadelta
Add python wrapper for Adadelta optimizer
2 parents 381c6a0 + 89c9f79 commit 7e526a6

File tree

2 files changed

+93
-1
lines changed

2 files changed

+93
-1
lines changed

doc/v2/api/fluid/optimizer.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,10 @@ DecayedAdagrad
4747
:members:
4848
:noindex:
4949

50+
Adadelta
51+
--------------
52+
53+
.. autoclass:: paddle.fluid.optimizer.AdadeltaOptimizer
54+
:members:
55+
:noindex:
56+

python/paddle/fluid/optimizer.py

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@
2424
from regularizer import append_regularization_ops
2525
from clip import append_gradient_clip_ops, error_clip_callback
2626

27-
__all__ = ['SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad']
27+
__all__ = [
28+
'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Adadelta'
29+
]
2830

2931

3032
class Optimizer(object):
@@ -580,6 +582,88 @@ def _append_optimize_op(self, block, param_and_grad):
580582
return decayed_adagrad_op
581583

582584

585+
class AdadeltaOptimizer(Optimizer):
586+
"""
587+
**Adadelta Optimizer**
588+
Simple Adadelta optimizer with average squared grad state and
589+
average squared update state.
590+
The details of adadelta please refer to this
591+
`ADADELTA: AN ADAPTIVE LEARNING RATE METHOD
592+
<http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf>`_.
593+
594+
.. math::
595+
596+
E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\
597+
learning\\_rate &= sqrt( ( E(dx_{t-1}^2) + \\epsilon ) / ( \\
598+
E(g_t^2) + \\epsilon ) ) \\\\
599+
E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\\_rate)^2
600+
601+
Args:
602+
learning_rate(float): global leraning rate
603+
rho(float): rho in equation
604+
epsilon(float): epsilon in equation
605+
606+
Examples:
607+
.. code-block:: python
608+
609+
optimizer = fluid.optimizer.Adadelta(
610+
learning_rate=0.0003, epsilon=1.0e-6, rho=0.95)
611+
_, params_grads = optimizer.minimize(cost)
612+
"""
613+
614+
_avg_squared_grad_acc_str = "_avg_squared_grad"
615+
_avg_squared_update_acc_str = "_avg_squared_update"
616+
617+
def __init__(self, learning_rate, epsilon=1.0e-6, rho=0.95, **kwargs):
618+
if learning_rate is None:
619+
raise ValueError("learning_rate is not set.")
620+
if epsilon is None:
621+
raise ValueError("epsilon is not set.")
622+
if rho is None:
623+
raise ValueError("rho is not set.")
624+
super(AdadeltaOptimizer, self).__init__(
625+
learning_rate=learning_rate, **kwargs)
626+
self.type = "adadelta"
627+
self._epsilon = epsilon
628+
self._rho = rho
629+
630+
def _create_accumulators(self, block, parameters):
631+
if not isinstance(block, framework.Block):
632+
raise TypeError("block is not instance of framework.Block.")
633+
634+
for p in parameters:
635+
self._add_accumulator(self._avg_squared_grad_acc_str, p)
636+
self._add_accumulator(self._avg_squared_update_acc_str, p)
637+
638+
def _append_optimize_op(self, block, param_and_grad):
639+
if not isinstance(block, framework.Block):
640+
raise TypeError("block is not instance of framework.Block.")
641+
642+
avg_squared_grad_acc = self._get_accumulator(
643+
self._avg_squared_grad_acc_str, param_and_grad[0])
644+
avg_squared_update_acc = self._get_accumulator(
645+
self._avg_squared_update_acc_str, param_and_grad[0])
646+
647+
# Create the adadelta optimizer op
648+
adadelta_op = block.append_op(
649+
type=self.type,
650+
inputs={
651+
"Param": param_and_grad[0],
652+
"Grad": param_and_grad[1],
653+
"AvgSquaredGrad": avg_squared_grad_acc,
654+
"AvgSquaredUpdate": avg_squared_update_acc
655+
},
656+
outputs={
657+
"ParamOut": param_and_grad[0],
658+
"AvgSquaredGradOut": avg_squared_grad_acc,
659+
"AvgSquaredUpdateOut": avg_squared_update_acc
660+
},
661+
attrs={"epsilon": self._epsilon,
662+
"rho": self._rho})
663+
664+
return adadelta_op
665+
666+
583667
# We short the class name, since users will use the optimizer with the package
584668
# name. The sample code:
585669
#
@@ -594,3 +678,4 @@ def _append_optimize_op(self, block, param_and_grad):
594678
Adam = AdamOptimizer
595679
Adamax = AdamaxOptimizer
596680
DecayedAdagrad = DecayedAdagradOptimizer
681+
Adadelta = AdadeltaOptimizer

0 commit comments

Comments
 (0)