Skip to content

Commit d7bf066

Browse files
authored
Adding interface for decayed adagrad optimizer (#5644)
* add decayed adagrad python code * fix typo and order * small fix
1 parent 1db1a0d commit d7bf066

File tree

2 files changed

+106
-4
lines changed

2 files changed

+106
-4
lines changed

python/paddle/v2/fluid/optimizer.py

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
__all__ = [
1111
'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer',
12-
'AdamaxOptimizer'
12+
'AdamaxOptimizer', 'DecayedAdagradOptimizer'
1313
]
1414

1515

@@ -85,7 +85,7 @@ def _add_accumulator(self, name, param, dtype=None, fill_value=0.0):
8585
"""
8686
if (name in self._accumulators and
8787
param.name in self._accumulators[name]):
88-
raise Exception("Accumulator {} already exists for parmeter {}".
88+
raise Exception("Accumulator {} already exists for parameter {}".
8989
format(name, param.name))
9090

9191
assert isinstance(self.helper, LayerHelper)
@@ -307,7 +307,7 @@ def _append_optimize_op(self, block, param_and_grad):
307307
moment_acc = self._get_accumulator(self._moment_acc_str,
308308
param_and_grad[0])
309309

310-
# create the adagrad optimizer op
310+
# Create the adagrad optimizer op
311311
adagrad_op = block.append_op(
312312
type=self.type,
313313
inputs={
@@ -510,3 +510,51 @@ def _finish_update(self, block):
510510
attrs={"scale": self._beta1})
511511

512512
return [scale_beta1]
513+
514+
515+
class DecayedAdagradOptimizer(Optimizer):
516+
"""Simple Decayed Adagrad optimizer with moment state
517+
"""
518+
_moment_acc_str = "moment"
519+
520+
def __init__(self,
521+
learning_rate,
522+
decay=0.95,
523+
epsilon=1.0e-6,
524+
global_step=None):
525+
assert learning_rate is not None
526+
assert decay is not None
527+
assert epsilon is not None
528+
529+
super(DecayedAdagradOptimizer, self).__init__(global_step)
530+
self.type = "decayed_adagrad"
531+
self._learning_rate = learning_rate
532+
self._decay = decay
533+
self._epsilon = epsilon
534+
535+
def _create_accumulators(self, block, parameters):
536+
assert isinstance(block, framework.Block)
537+
538+
for p in parameters:
539+
self._add_accumulator(self._moment_acc_str, p)
540+
541+
def _append_optimize_op(self, block, param_and_grad):
542+
assert isinstance(block, framework.Block)
543+
544+
moment_acc = self._get_accumulator(self._moment_acc_str,
545+
param_and_grad[0])
546+
547+
# Create the decayed adagrad optimizer op
548+
decayed_adagrad_op = block.append_op(
549+
type=self.type,
550+
inputs={
551+
"Param": param_and_grad[0],
552+
"Grad": param_and_grad[1],
553+
"Moment": moment_acc,
554+
"LearningRate": self._create_param_lr(param_and_grad)
555+
},
556+
outputs={"ParamOut": param_and_grad[0],
557+
"MomentOut": moment_acc},
558+
attrs={"epsilon": self._epsilon})
559+
560+
return decayed_adagrad_op

python/paddle/v2/fluid/tests/test_optimizer.py

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ def test_adagrad_optimizer(self):
198198
adagrad_op = opts[0]
199199
self.assertEqual(adagrad_op.type, "adagrad")
200200

201-
# check accumulators
201+
# Check accumulators
202202
accumulators = adagrad_optimizer.get_accumulators()
203203
self.assertEqual(len(accumulators), 1)
204204
self.assertTrue(adagrad_optimizer.get_moment_str() in accumulators)
@@ -331,5 +331,59 @@ def test_adamax_optimizer(self):
331331
self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
332332

333333

334+
class TestDecayedAdagradOptimizer(unittest.TestCase):
335+
class MockDecayedAdagrad(optimizer.DecayedAdagradOptimizer):
336+
def get_accumulators(self):
337+
return self._accumulators
338+
339+
def get_moment_str(self):
340+
return self._moment_acc_str
341+
342+
def test_decayed_adagrad_optimizer(self):
343+
init_program = framework.Program()
344+
program = framework.Program()
345+
block = program.global_block()
346+
mul_x = block.create_parameter(
347+
dtype="float32", shape=[5, 10], lod_level=0, name="mul.x")
348+
mul_y = block.create_var(
349+
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
350+
mul_out = block.create_var(
351+
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
352+
block.append_op(
353+
type="mul",
354+
inputs={"X": mul_x,
355+
"Y": mul_y},
356+
outputs={"Out": mul_out},
357+
attrs={"x_num_col_dims": 1})
358+
learning_rate = 0.01
359+
decayed_adagrad_optimizer = self.MockDecayedAdagrad(
360+
learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6)
361+
params_grads = append_backward_ops(mul_out)
362+
self.assertEqual(len(params_grads), 1)
363+
self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0)
364+
opts = decayed_adagrad_optimizer.create_optimization_pass(
365+
params_grads, mul_out, init_program)
366+
self.assertEqual(len(opts), 1)
367+
decayed_adagrad_op = opts[0]
368+
self.assertEqual(decayed_adagrad_op.type, "decayed_adagrad")
369+
370+
# Check accumulators
371+
accumulators = decayed_adagrad_optimizer.get_accumulators()
372+
self.assertEqual(len(accumulators), 1)
373+
self.assertTrue(
374+
decayed_adagrad_optimizer.get_moment_str() in accumulators)
375+
moment_acc = accumulators[decayed_adagrad_optimizer.get_moment_str()]
376+
self.assertEqual(len(moment_acc), 1)
377+
self.assertTrue(mul_x.name in moment_acc)
378+
379+
# Check init_program
380+
init_ops = init_program.global_block().ops
381+
self.assertEqual(len(init_ops), 2)
382+
self.assertEqual(init_ops[0].type, "fill_constant")
383+
self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
384+
self.assertEqual(init_ops[1].type, "fill_constant")
385+
self.assertAlmostEqual(init_ops[1].attr('value'), 0.0)
386+
387+
334388
if __name__ == '__main__':
335389
unittest.main()

0 commit comments

Comments
 (0)