Skip to content

Commit 2030958

Browse files
committed
covert **kwargs to explicit arguments
Also deprecate LARs argument
1 parent 8a8c572 commit 2030958

File tree

2 files changed

+104
-39
lines changed

2 files changed

+104
-39
lines changed

python/paddle/fluid/optimizer.py

Lines changed: 99 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,7 @@ class Optimizer(object):
4343
but need to use one of it's implementation.
4444
"""
4545

46-
def __init__(self,
47-
learning_rate,
48-
regularization=None,
49-
LARS_weight_decay=0.0,
50-
name=None):
46+
def __init__(self, learning_rate, regularization=None, name=None):
5147
if not isinstance(learning_rate, float) and \
5248
not isinstance(learning_rate, framework.Variable):
5349
raise TypeError("learning rate should be float or Variable")
@@ -68,7 +64,6 @@ def __init__(self,
6864
# {accum_name : { paramter_name : accumulator_for_parameter, ...}, ...}
6965
self._accumulators = defaultdict(lambda: dict())
7066
self.helper = None
71-
self._LARS_weight_decay = LARS_weight_decay
7267

7368
def _create_global_learning_rate(self):
7469
lr = self._global_learning_rate()
@@ -227,10 +222,6 @@ def _create_optimization_pass(self,
227222
self._create_accumulators(loss.block,
228223
[p[0] for p in parameters_and_grads])
229224
self._create_global_learning_rate()
230-
if self._LARS_weight_decay > 0.0:
231-
layers.append_LARS(parameters_and_grads,
232-
self._global_learning_rate(),
233-
self._LARS_weight_decay)
234225

235226
optimize_ops = []
236227
for param_and_grad in parameters_and_grads:
@@ -287,6 +278,9 @@ class SGDOptimizer(Optimizer):
287278
Args:
288279
learning_rate (float|Variable): the learning rate used to update parameters. \
289280
Can be a float value or a Variable with one float value as data element.
281+
regularization: A Regularizer, such as
282+
fluid.regularizer.L2DecayRegularizer.
283+
name: A optional name prefix.
290284
291285
Examples:
292286
.. code-block:: python
@@ -295,10 +289,12 @@ class SGDOptimizer(Optimizer):
295289
sgd_optimizer.minimize(cost)
296290
"""
297291

298-
def __init__(self, learning_rate, **kwargs):
292+
def __init__(self, learning_rate, regularization=None, name=None):
299293
assert learning_rate is not None
300294
super(SGDOptimizer, self).__init__(
301-
learning_rate=learning_rate, **kwargs)
295+
learning_rate=learning_rate,
296+
regularization=regularization,
297+
name=name)
302298
self.type = "sgd"
303299

304300
def _append_optimize_op(self, block, param_and_grad):
@@ -343,6 +339,9 @@ class MomentumOptimizer(Optimizer):
343339
Can be a float value or a Variable with one float value as data element.
344340
momentum (float): momentum factor
345341
use_nesterov (bool): enables Nesterov momentum
342+
regularization: A Regularizer, such as
343+
fluid.regularizer.L2DecayRegularizer.
344+
name: A optional name prefix.
346345
347346
Examples:
348347
.. code-block:: python
@@ -352,11 +351,18 @@ class MomentumOptimizer(Optimizer):
352351
"""
353352
_velocity_acc_str = "velocity"
354353

355-
def __init__(self, learning_rate, momentum, use_nesterov=False, **kwargs):
354+
def __init__(self,
355+
learning_rate,
356+
momentum,
357+
use_nesterov=False,
358+
regularization=None,
359+
name=None):
356360
assert learning_rate is not None
357361
assert momentum is not None
358362
super(MomentumOptimizer, self).__init__(
359-
learning_rate=learning_rate, **kwargs)
363+
learning_rate=learning_rate,
364+
regularization=regularization,
365+
name=name)
360366
self.type = "momentum"
361367
self._momentum = momentum
362368
self._use_nesterov = bool(use_nesterov)
@@ -412,6 +418,9 @@ class AdagradOptimizer(Optimizer):
412418
learning_rate (float|Variable): the learning rate used to update parameters. \
413419
Can be a float value or a Variable with one float value as data element.
414420
epsilon (float): a small float value for numerical stability.
421+
regularization: A Regularizer, such as
422+
fluid.regularizer.L2DecayRegularizer.
423+
name: A optional name prefix.
415424
416425
Examples:
417426
.. code-block:: python
@@ -421,11 +430,17 @@ class AdagradOptimizer(Optimizer):
421430
"""
422431
_moment_acc_str = "moment"
423432

424-
def __init__(self, learning_rate, epsilon=1.0e-6, **kwargs):
433+
def __init__(self,
434+
learning_rate,
435+
epsilon=1.0e-6,
436+
regularization=None,
437+
name=None):
425438
assert learning_rate is not None
426439
assert epsilon is not None
427440
super(AdagradOptimizer, self).__init__(
428-
learning_rate=learning_rate, **kwargs)
441+
learning_rate=learning_rate,
442+
regularization=regularization,
443+
name=name)
429444
self.type = "adagrad"
430445
self._epsilon = epsilon
431446

@@ -485,6 +500,9 @@ class AdamOptimizer(Optimizer):
485500
beta1 (float): The exponential decay rate for the 1st moment estimates.
486501
beta2 (float): The exponential decay rate for the 2nd moment estimates.
487502
epsilon (float): a small float value for numerical stability.
503+
regularization: A Regularizer, such as
504+
fluid.regularizer.L2DecayRegularizer.
505+
name: A optional name prefix.
488506
489507
Examples:
490508
.. code-block:: python
@@ -503,13 +521,16 @@ def __init__(self,
503521
beta1=0.9,
504522
beta2=0.999,
505523
epsilon=1e-8,
506-
**kwargs):
524+
regularization=None,
525+
name=None):
507526
assert learning_rate is not None
508527
assert beta1 is not None
509528
assert beta2 is not None
510529
assert epsilon is not None
511530
super(AdamOptimizer, self).__init__(
512-
learning_rate=learning_rate, **kwargs)
531+
learning_rate=learning_rate,
532+
regularization=regularization,
533+
name=name)
513534
self.type = "adam"
514535
self._beta1 = beta1
515536
self._beta2 = beta2
@@ -629,6 +650,9 @@ class AdamaxOptimizer(Optimizer):
629650
beta1 (float): The exponential decay rate for the 1st moment estimates.
630651
beta2 (float): The exponential decay rate for the 2nd moment estimates.
631652
epsilon (float): a small float value for numerical stability.
653+
regularization: A Regularizer, such as
654+
fluid.regularizer.L2DecayRegularizer.
655+
name: A optional name prefix.
632656
633657
Examples:
634658
.. code-block:: python
@@ -645,13 +669,16 @@ def __init__(self,
645669
beta1=0.9,
646670
beta2=0.999,
647671
epsilon=1e-8,
648-
**kwargs):
672+
regularization=None,
673+
name=None):
649674
assert learning_rate is not None
650675
assert beta1 is not None
651676
assert beta2 is not None
652677
assert epsilon is not None
653678
super(AdamaxOptimizer, self).__init__(
654-
learning_rate=learning_rate, **kwargs)
679+
learning_rate=learning_rate,
680+
regularization=regularization,
681+
name=name)
655682
self.type = "adamax"
656683
self._beta1 = beta1
657684
self._beta2 = beta2
@@ -742,6 +769,9 @@ class DecayedAdagradOptimizer(Optimizer):
742769
Can be a float value or a Variable with one float value as data element.
743770
decay (float): decay rate.
744771
epsilon (float): a small float value for numerical stability.
772+
regularization: A Regularizer, such as
773+
fluid.regularizer.L2DecayRegularizer.
774+
name: A optional name prefix.
745775
746776
Examples:
747777
.. code-block:: python
@@ -751,13 +781,20 @@ class DecayedAdagradOptimizer(Optimizer):
751781
"""
752782
_moment_acc_str = "moment"
753783

754-
def __init__(self, learning_rate, decay=0.95, epsilon=1.0e-6, **kwargs):
784+
def __init__(self,
785+
learning_rate,
786+
decay=0.95,
787+
epsilon=1.0e-6,
788+
regularization=None,
789+
name=None):
755790
assert learning_rate is not None
756791
assert decay is not None
757792
assert epsilon is not None
758793

759794
super(DecayedAdagradOptimizer, self).__init__(
760-
learning_rate=learning_rate, **kwargs)
795+
learning_rate=learning_rate,
796+
regularization=regularization,
797+
name=name)
761798
self.type = "decayed_adagrad"
762799
self._decay = decay
763800
self._epsilon = epsilon
@@ -811,6 +848,9 @@ class AdadeltaOptimizer(Optimizer):
811848
learning_rate(float): global learning rate
812849
rho(float): rho in equation
813850
epsilon(float): epsilon in equation
851+
regularization: A Regularizer, such as
852+
fluid.regularizer.L2DecayRegularizer.
853+
name: A optional name prefix.
814854
815855
Examples:
816856
.. code-block:: python
@@ -823,15 +863,22 @@ class AdadeltaOptimizer(Optimizer):
823863
_avg_squared_grad_acc_str = "_avg_squared_grad"
824864
_avg_squared_update_acc_str = "_avg_squared_update"
825865

826-
def __init__(self, learning_rate, epsilon=1.0e-6, rho=0.95, **kwargs):
866+
def __init__(self,
867+
learning_rate,
868+
epsilon=1.0e-6,
869+
rho=0.95,
870+
regularization=None,
871+
name=None):
827872
if learning_rate is None:
828873
raise ValueError("learning_rate is not set.")
829874
if epsilon is None:
830875
raise ValueError("epsilon is not set.")
831876
if rho is None:
832877
raise ValueError("rho is not set.")
833878
super(AdadeltaOptimizer, self).__init__(
834-
learning_rate=learning_rate, **kwargs)
879+
learning_rate=learning_rate,
880+
regularization=regularization,
881+
name=name)
835882
self.type = "adadelta"
836883
self._epsilon = epsilon
837884
self._rho = rho
@@ -932,6 +979,9 @@ class RMSPropOptimizer(Optimizer):
932979
the gradient; if False, by the uncentered second moment. Setting this to
933980
True may help with training, but is slightly more expensive in terms of
934981
computation and memory. Defaults to False.
982+
regularization: A Regularizer, such as
983+
fluid.regularizer.L2DecayRegularizer.
984+
name: A optional name prefix.
935985
936986
Raises:
937987
ValueError: If learning_rate, rho, epsilon, momentum are None.
@@ -953,9 +1003,12 @@ def __init__(self,
9531003
epsilon=1.0e-6,
9541004
momentum=0.0,
9551005
centered=False,
956-
**kwargs):
1006+
regularization=None,
1007+
name=None):
9571008
super(RMSPropOptimizer, self).__init__(
958-
learning_rate=learning_rate, **kwargs)
1009+
learning_rate=learning_rate,
1010+
regularization=regularization,
1011+
name=name)
9591012
if learning_rate is None:
9601013
raise ValueError("learning_rate is not set.")
9611014
if rho is None:
@@ -1061,6 +1114,9 @@ class FtrlOptimizer(Optimizer):
10611114
l1 (float):
10621115
l2 (float):
10631116
lr_power (float):
1117+
regularization: A Regularizer, such as
1118+
fluid.regularizer.L2DecayRegularizer.
1119+
name: A optional name prefix.
10641120
10651121
Raises:
10661122
ValueError: If learning_rate, rho, epsilon, momentum are None.
@@ -1075,9 +1131,17 @@ class FtrlOptimizer(Optimizer):
10751131
_squared_acc_str = "squared"
10761132
_linear_acc_str = "linear"
10771133

1078-
def __init__(self, learning_rate, l1=0.0, l2=0.0, lr_power=-0.5, **kwargs):
1134+
def __init__(self,
1135+
learning_rate,
1136+
l1=0.0,
1137+
l2=0.0,
1138+
lr_power=-0.5,
1139+
regularization=None,
1140+
name=None):
10791141
super(FtrlOptimizer, self).__init__(
1080-
learning_rate=learning_rate, **kwargs)
1142+
learning_rate=learning_rate,
1143+
regularization=regularization,
1144+
name=name)
10811145
if learning_rate is None:
10821146
raise ValueError("learning_rate is not set.")
10831147

@@ -1155,7 +1219,9 @@ class ModelAverage(Optimizer):
11551219
average_window_rate: The rate of average window.
11561220
min_average_window: The minimum size of average window.
11571221
max_average_window: The maximum size of average window.
1158-
1222+
regularization: A Regularizer, such as
1223+
fluid.regularizer.L2DecayRegularizer.
1224+
name: A optional name prefix.
11591225
Examples:
11601226
11611227
.. code-block:: python
@@ -1178,8 +1244,10 @@ def __init__(self,
11781244
average_window_rate,
11791245
min_average_window=10000,
11801246
max_average_window=10000,
1181-
**kwargs):
1182-
super(ModelAverage, self).__init__(0.0, **kwargs)
1247+
regularization=None,
1248+
name=None):
1249+
super(ModelAverage, self).__init__(
1250+
0.0, regularization=regularization, name=name)
11831251
self.average_window = average_window_rate
11841252
self.min_average_window = min_average_window
11851253
self.max_average_window = max_average_window

python/paddle/fluid/regularizer.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -190,14 +190,11 @@ class L1DecayRegularizer(WeightDecayRegularizer):
190190
Examples:
191191
.. code-block:: python
192192
193-
program = fluid.framework.Program()
194-
block = program.global_block()
195-
mul_x = block.create_parameter(
196-
dtype="float32",
197-
shape=[5, 10],
198-
lod_level=0,
199-
name="mul.x",
200-
regularizer=fluid.regularizer.L1DecayRegularizer(0.5))
193+
optimizer = fluid.optimizer.Adagrad(
194+
learning_rate=1e-4,
195+
regularization=fluid.regularizer.L1DecayRegularizer(
196+
regularization_coeff=0.1))
197+
optimizer.minimize(avg_cost)
201198
"""
202199

203200
def __init__(self, regularization_coeff=0.0):

0 commit comments

Comments
 (0)