24
24
from regularizer import append_regularization_ops
25
25
from clip import append_gradient_clip_ops , error_clip_callback
26
26
27
- __all__ = ['SGD' , 'Momentum' , 'Adagrad' , 'Adam' , 'Adamax' , 'DecayedAdagrad' ]
27
+ __all__ = [
28
+ 'SGD' , 'Momentum' , 'Adagrad' , 'Adam' , 'Adamax' , 'DecayedAdagrad' , 'Adadelta'
29
+ ]
28
30
29
31
30
32
class Optimizer (object ):
@@ -575,6 +577,58 @@ def _append_optimize_op(self, block, param_and_grad):
575
577
return decayed_adagrad_op
576
578
577
579
580
+ class AdadeltaOptimizer (Optimizer ):
581
+ """Simple Adadelta optimizer with average squared grad state and
582
+ average squared update state.
583
+ """
584
+ _avg_squared_grad_acc_str = "_avg_squared_grad"
585
+ _avg_squared_update_acc_str = "_avg_squared_update"
586
+
587
+ def __init__ (self , learning_rate , epsilon = 1.0e-6 , rho = 0.95 , ** kwargs ):
588
+ assert learning_rate is not None
589
+ assert epsilon is not None
590
+ assert rho is not None
591
+ super (AdadeltaOptimizer , self ).__init__ (
592
+ learning_rate = learning_rate , ** kwargs )
593
+ self .type = "adadelta"
594
+ self ._epsilon = epsilon
595
+ self ._rho = rho
596
+
597
+ def _create_accumulators (self , block , parameters ):
598
+ assert isinstance (block , framework .Block )
599
+
600
+ for p in parameters :
601
+ self ._add_accumulator (self ._avg_squared_grad_acc_str , p )
602
+ self ._add_accumulator (self ._avg_squared_update_acc_str , p )
603
+
604
+ def _append_optimize_op (self , block , param_and_grad ):
605
+ assert isinstance (block , framework .Block )
606
+
607
+ avg_squared_grad_acc = self ._get_accumulator (
608
+ self ._avg_squared_grad_acc_str , param_and_grad [0 ])
609
+ avg_squared_update_acc = self ._get_accumulator (
610
+ self ._avg_squared_update_acc_str , param_and_grad [0 ])
611
+
612
+ # Create the adadelta optimizer op
613
+ adadelta_op = block .append_op (
614
+ type = self .type ,
615
+ inputs = {
616
+ "Param" : param_and_grad [0 ],
617
+ "Grad" : param_and_grad [1 ],
618
+ "AvgSquaredGrad" : avg_squared_grad_acc ,
619
+ "AvgSquaredUpdate" : avg_squared_update_acc
620
+ },
621
+ outputs = {
622
+ "ParamOut" : param_and_grad [0 ],
623
+ "AvgSquaredGradOut" : avg_squared_grad_acc ,
624
+ "AvgSquaredUpdateOut" : avg_squared_update_acc
625
+ },
626
+ attrs = {"epsilon" : self ._epsilon ,
627
+ "rho" : self ._rho })
628
+
629
+ return adadelta_op
630
+
631
+
578
632
# We short the class name, since users will use the optimizer with the package
579
633
# name. The sample code:
580
634
#
@@ -589,3 +643,4 @@ def _append_optimize_op(self, block, param_and_grad):
589
643
Adam = AdamOptimizer
590
644
Adamax = AdamaxOptimizer
591
645
DecayedAdagrad = DecayedAdagradOptimizer
646
+ Adadelta = AdadeltaOptimizer
0 commit comments