@@ -43,11 +43,7 @@ class Optimizer(object):
43
43
but need to use one of it's implementation.
44
44
"""
45
45
46
- def __init__ (self ,
47
- learning_rate ,
48
- regularization = None ,
49
- LARS_weight_decay = 0.0 ,
50
- name = None ):
46
+ def __init__ (self , learning_rate , regularization = None , name = None ):
51
47
if not isinstance (learning_rate , float ) and \
52
48
not isinstance (learning_rate , framework .Variable ):
53
49
raise TypeError ("learning rate should be float or Variable" )
@@ -68,7 +64,6 @@ def __init__(self,
68
64
# {accum_name : { paramter_name : accumulator_for_parameter, ...}, ...}
69
65
self ._accumulators = defaultdict (lambda : dict ())
70
66
self .helper = None
71
- self ._LARS_weight_decay = LARS_weight_decay
72
67
73
68
def _create_global_learning_rate (self ):
74
69
lr = self ._global_learning_rate ()
@@ -227,10 +222,6 @@ def _create_optimization_pass(self,
227
222
self ._create_accumulators (loss .block ,
228
223
[p [0 ] for p in parameters_and_grads ])
229
224
self ._create_global_learning_rate ()
230
- if self ._LARS_weight_decay > 0.0 :
231
- layers .append_LARS (parameters_and_grads ,
232
- self ._global_learning_rate (),
233
- self ._LARS_weight_decay )
234
225
235
226
optimize_ops = []
236
227
for param_and_grad in parameters_and_grads :
@@ -287,6 +278,9 @@ class SGDOptimizer(Optimizer):
287
278
Args:
288
279
learning_rate (float|Variable): the learning rate used to update parameters. \
289
280
Can be a float value or a Variable with one float value as data element.
281
+ regularization: A Regularizer, such as
282
+ fluid.regularizer.L2DecayRegularizer.
283
+ name: A optional name prefix.
290
284
291
285
Examples:
292
286
.. code-block:: python
@@ -295,10 +289,12 @@ class SGDOptimizer(Optimizer):
295
289
sgd_optimizer.minimize(cost)
296
290
"""
297
291
298
- def __init__ (self , learning_rate , ** kwargs ):
292
+ def __init__ (self , learning_rate , regularization = None , name = None ):
299
293
assert learning_rate is not None
300
294
super (SGDOptimizer , self ).__init__ (
301
- learning_rate = learning_rate , ** kwargs )
295
+ learning_rate = learning_rate ,
296
+ regularization = regularization ,
297
+ name = name )
302
298
self .type = "sgd"
303
299
304
300
def _append_optimize_op (self , block , param_and_grad ):
@@ -343,6 +339,9 @@ class MomentumOptimizer(Optimizer):
343
339
Can be a float value or a Variable with one float value as data element.
344
340
momentum (float): momentum factor
345
341
use_nesterov (bool): enables Nesterov momentum
342
+ regularization: A Regularizer, such as
343
+ fluid.regularizer.L2DecayRegularizer.
344
+ name: A optional name prefix.
346
345
347
346
Examples:
348
347
.. code-block:: python
@@ -352,11 +351,18 @@ class MomentumOptimizer(Optimizer):
352
351
"""
353
352
_velocity_acc_str = "velocity"
354
353
355
- def __init__ (self , learning_rate , momentum , use_nesterov = False , ** kwargs ):
354
+ def __init__ (self ,
355
+ learning_rate ,
356
+ momentum ,
357
+ use_nesterov = False ,
358
+ regularization = None ,
359
+ name = None ):
356
360
assert learning_rate is not None
357
361
assert momentum is not None
358
362
super (MomentumOptimizer , self ).__init__ (
359
- learning_rate = learning_rate , ** kwargs )
363
+ learning_rate = learning_rate ,
364
+ regularization = regularization ,
365
+ name = name )
360
366
self .type = "momentum"
361
367
self ._momentum = momentum
362
368
self ._use_nesterov = bool (use_nesterov )
@@ -412,6 +418,9 @@ class AdagradOptimizer(Optimizer):
412
418
learning_rate (float|Variable): the learning rate used to update parameters. \
413
419
Can be a float value or a Variable with one float value as data element.
414
420
epsilon (float): a small float value for numerical stability.
421
+ regularization: A Regularizer, such as
422
+ fluid.regularizer.L2DecayRegularizer.
423
+ name: A optional name prefix.
415
424
416
425
Examples:
417
426
.. code-block:: python
@@ -421,11 +430,17 @@ class AdagradOptimizer(Optimizer):
421
430
"""
422
431
_moment_acc_str = "moment"
423
432
424
- def __init__ (self , learning_rate , epsilon = 1.0e-6 , ** kwargs ):
433
+ def __init__ (self ,
434
+ learning_rate ,
435
+ epsilon = 1.0e-6 ,
436
+ regularization = None ,
437
+ name = None ):
425
438
assert learning_rate is not None
426
439
assert epsilon is not None
427
440
super (AdagradOptimizer , self ).__init__ (
428
- learning_rate = learning_rate , ** kwargs )
441
+ learning_rate = learning_rate ,
442
+ regularization = regularization ,
443
+ name = name )
429
444
self .type = "adagrad"
430
445
self ._epsilon = epsilon
431
446
@@ -485,6 +500,9 @@ class AdamOptimizer(Optimizer):
485
500
beta1 (float): The exponential decay rate for the 1st moment estimates.
486
501
beta2 (float): The exponential decay rate for the 2nd moment estimates.
487
502
epsilon (float): a small float value for numerical stability.
503
+ regularization: A Regularizer, such as
504
+ fluid.regularizer.L2DecayRegularizer.
505
+ name: A optional name prefix.
488
506
489
507
Examples:
490
508
.. code-block:: python
@@ -503,13 +521,16 @@ def __init__(self,
503
521
beta1 = 0.9 ,
504
522
beta2 = 0.999 ,
505
523
epsilon = 1e-8 ,
506
- ** kwargs ):
524
+ regularization = None ,
525
+ name = None ):
507
526
assert learning_rate is not None
508
527
assert beta1 is not None
509
528
assert beta2 is not None
510
529
assert epsilon is not None
511
530
super (AdamOptimizer , self ).__init__ (
512
- learning_rate = learning_rate , ** kwargs )
531
+ learning_rate = learning_rate ,
532
+ regularization = regularization ,
533
+ name = name )
513
534
self .type = "adam"
514
535
self ._beta1 = beta1
515
536
self ._beta2 = beta2
@@ -629,6 +650,9 @@ class AdamaxOptimizer(Optimizer):
629
650
beta1 (float): The exponential decay rate for the 1st moment estimates.
630
651
beta2 (float): The exponential decay rate for the 2nd moment estimates.
631
652
epsilon (float): a small float value for numerical stability.
653
+ regularization: A Regularizer, such as
654
+ fluid.regularizer.L2DecayRegularizer.
655
+ name: A optional name prefix.
632
656
633
657
Examples:
634
658
.. code-block:: python
@@ -645,13 +669,16 @@ def __init__(self,
645
669
beta1 = 0.9 ,
646
670
beta2 = 0.999 ,
647
671
epsilon = 1e-8 ,
648
- ** kwargs ):
672
+ regularization = None ,
673
+ name = None ):
649
674
assert learning_rate is not None
650
675
assert beta1 is not None
651
676
assert beta2 is not None
652
677
assert epsilon is not None
653
678
super (AdamaxOptimizer , self ).__init__ (
654
- learning_rate = learning_rate , ** kwargs )
679
+ learning_rate = learning_rate ,
680
+ regularization = regularization ,
681
+ name = name )
655
682
self .type = "adamax"
656
683
self ._beta1 = beta1
657
684
self ._beta2 = beta2
@@ -742,6 +769,9 @@ class DecayedAdagradOptimizer(Optimizer):
742
769
Can be a float value or a Variable with one float value as data element.
743
770
decay (float): decay rate.
744
771
epsilon (float): a small float value for numerical stability.
772
+ regularization: A Regularizer, such as
773
+ fluid.regularizer.L2DecayRegularizer.
774
+ name: A optional name prefix.
745
775
746
776
Examples:
747
777
.. code-block:: python
@@ -751,13 +781,20 @@ class DecayedAdagradOptimizer(Optimizer):
751
781
"""
752
782
_moment_acc_str = "moment"
753
783
754
- def __init__ (self , learning_rate , decay = 0.95 , epsilon = 1.0e-6 , ** kwargs ):
784
+ def __init__ (self ,
785
+ learning_rate ,
786
+ decay = 0.95 ,
787
+ epsilon = 1.0e-6 ,
788
+ regularization = None ,
789
+ name = None ):
755
790
assert learning_rate is not None
756
791
assert decay is not None
757
792
assert epsilon is not None
758
793
759
794
super (DecayedAdagradOptimizer , self ).__init__ (
760
- learning_rate = learning_rate , ** kwargs )
795
+ learning_rate = learning_rate ,
796
+ regularization = regularization ,
797
+ name = name )
761
798
self .type = "decayed_adagrad"
762
799
self ._decay = decay
763
800
self ._epsilon = epsilon
@@ -811,6 +848,9 @@ class AdadeltaOptimizer(Optimizer):
811
848
learning_rate(float): global learning rate
812
849
rho(float): rho in equation
813
850
epsilon(float): epsilon in equation
851
+ regularization: A Regularizer, such as
852
+ fluid.regularizer.L2DecayRegularizer.
853
+ name: A optional name prefix.
814
854
815
855
Examples:
816
856
.. code-block:: python
@@ -823,15 +863,22 @@ class AdadeltaOptimizer(Optimizer):
823
863
_avg_squared_grad_acc_str = "_avg_squared_grad"
824
864
_avg_squared_update_acc_str = "_avg_squared_update"
825
865
826
- def __init__ (self , learning_rate , epsilon = 1.0e-6 , rho = 0.95 , ** kwargs ):
866
+ def __init__ (self ,
867
+ learning_rate ,
868
+ epsilon = 1.0e-6 ,
869
+ rho = 0.95 ,
870
+ regularization = None ,
871
+ name = None ):
827
872
if learning_rate is None :
828
873
raise ValueError ("learning_rate is not set." )
829
874
if epsilon is None :
830
875
raise ValueError ("epsilon is not set." )
831
876
if rho is None :
832
877
raise ValueError ("rho is not set." )
833
878
super (AdadeltaOptimizer , self ).__init__ (
834
- learning_rate = learning_rate , ** kwargs )
879
+ learning_rate = learning_rate ,
880
+ regularization = regularization ,
881
+ name = name )
835
882
self .type = "adadelta"
836
883
self ._epsilon = epsilon
837
884
self ._rho = rho
@@ -932,6 +979,9 @@ class RMSPropOptimizer(Optimizer):
932
979
the gradient; if False, by the uncentered second moment. Setting this to
933
980
True may help with training, but is slightly more expensive in terms of
934
981
computation and memory. Defaults to False.
982
+ regularization: A Regularizer, such as
983
+ fluid.regularizer.L2DecayRegularizer.
984
+ name: A optional name prefix.
935
985
936
986
Raises:
937
987
ValueError: If learning_rate, rho, epsilon, momentum are None.
@@ -953,9 +1003,12 @@ def __init__(self,
953
1003
epsilon = 1.0e-6 ,
954
1004
momentum = 0.0 ,
955
1005
centered = False ,
956
- ** kwargs ):
1006
+ regularization = None ,
1007
+ name = None ):
957
1008
super (RMSPropOptimizer , self ).__init__ (
958
- learning_rate = learning_rate , ** kwargs )
1009
+ learning_rate = learning_rate ,
1010
+ regularization = regularization ,
1011
+ name = name )
959
1012
if learning_rate is None :
960
1013
raise ValueError ("learning_rate is not set." )
961
1014
if rho is None :
@@ -1061,6 +1114,9 @@ class FtrlOptimizer(Optimizer):
1061
1114
l1 (float):
1062
1115
l2 (float):
1063
1116
lr_power (float):
1117
+ regularization: A Regularizer, such as
1118
+ fluid.regularizer.L2DecayRegularizer.
1119
+ name: A optional name prefix.
1064
1120
1065
1121
Raises:
1066
1122
ValueError: If learning_rate, rho, epsilon, momentum are None.
@@ -1075,9 +1131,17 @@ class FtrlOptimizer(Optimizer):
1075
1131
_squared_acc_str = "squared"
1076
1132
_linear_acc_str = "linear"
1077
1133
1078
- def __init__ (self , learning_rate , l1 = 0.0 , l2 = 0.0 , lr_power = - 0.5 , ** kwargs ):
1134
+ def __init__ (self ,
1135
+ learning_rate ,
1136
+ l1 = 0.0 ,
1137
+ l2 = 0.0 ,
1138
+ lr_power = - 0.5 ,
1139
+ regularization = None ,
1140
+ name = None ):
1079
1141
super (FtrlOptimizer , self ).__init__ (
1080
- learning_rate = learning_rate , ** kwargs )
1142
+ learning_rate = learning_rate ,
1143
+ regularization = regularization ,
1144
+ name = name )
1081
1145
if learning_rate is None :
1082
1146
raise ValueError ("learning_rate is not set." )
1083
1147
@@ -1155,7 +1219,9 @@ class ModelAverage(Optimizer):
1155
1219
average_window_rate: The rate of average window.
1156
1220
min_average_window: The minimum size of average window.
1157
1221
max_average_window: The maximum size of average window.
1158
-
1222
+ regularization: A Regularizer, such as
1223
+ fluid.regularizer.L2DecayRegularizer.
1224
+ name: A optional name prefix.
1159
1225
Examples:
1160
1226
1161
1227
.. code-block:: python
@@ -1178,8 +1244,10 @@ def __init__(self,
1178
1244
average_window_rate ,
1179
1245
min_average_window = 10000 ,
1180
1246
max_average_window = 10000 ,
1181
- ** kwargs ):
1182
- super (ModelAverage , self ).__init__ (0.0 , ** kwargs )
1247
+ regularization = None ,
1248
+ name = None ):
1249
+ super (ModelAverage , self ).__init__ (
1250
+ 0.0 , regularization = regularization , name = name )
1183
1251
self .average_window = average_window_rate
1184
1252
self .min_average_window = min_average_window
1185
1253
self .max_average_window = max_average_window
0 commit comments