Skip to content

Commit 4ec9eca

Browse files
authored
Merge pull request #11547 from jacquesqiao/support-ftrl-optimizer
add ftrl optimizer
2 parents c22ebb3 + 6caea45 commit 4ec9eca

File tree

2 files changed

+178
-4
lines changed

2 files changed

+178
-4
lines changed

python/paddle/fluid/optimizer.py

Lines changed: 112 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@
2626
from contextlib import contextmanager
2727

2828
__all__ = [
29-
'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad',
29+
'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl',
3030
'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer',
3131
'AdamaxOptimizer', 'DecayedAdagradOptimizer', 'RMSPropOptimizer',
32-
'Adadelta', 'ModelAverage', 'Optimizer'
32+
'FtrlOptimizer', 'Adadelta', 'ModelAverage', 'Optimizer'
3333
]
3434

3535

@@ -628,7 +628,7 @@ class AdadeltaOptimizer(Optimizer):
628628
E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\\_rate)^2
629629
630630
Args:
631-
learning_rate(float): global leraning rate
631+
learning_rate(float): global learning rate
632632
rho(float): rho in equation
633633
epsilon(float): epsilon in equation
634634
@@ -729,7 +729,7 @@ class RMSPropOptimizer(Optimizer):
729729
730730
731731
Args:
732-
learning_rate(float): global leraning rate.
732+
learning_rate(float): global learning rate.
733733
rho(float): rho is :math: `\\rho` in equation, set 0.95 by default.
734734
epsilon(float): :math: `\\epsilon` in equation is smoothing term to
735735
avoid division by zero, set 1e-6 by default.
@@ -810,6 +810,113 @@ def _append_optimize_op(self, block, param_and_grad):
810810
return rmsprop_op
811811

812812

813+
class FtrlOptimizer(Optimizer):
814+
"""
815+
FTRL (Follow The Regularized Leader) Optimizer.
816+
817+
The paper that proposed Follow The Regularized Leader (FTRL):
818+
(https://www.eecs.tufts.edu/~dsculley/papers/ad-click-prediction.pdf)
819+
820+
.. math::
821+
822+
&new\_accum = squared\_accum + grad^2
823+
824+
&if (lr\_power == -0.5):
825+
826+
&\quad linear\_accum += grad - \\frac{\\sqrt{new\_accum} - \\sqrt{squared\_accum}}{learning\_rate * param}
827+
828+
&else:
829+
830+
&\quad linear\_accum += grad - \\frac{new\_accum^{-lr\_power} - accum^{-lr\_power}}{learning\_rate * param}
831+
832+
833+
&x = l1 * sign(linear\_accum) - linear\_accum
834+
835+
&if (lr\_power == -0.5):
836+
837+
&\quad y = \\frac{\\sqrt{new\_accum}}{learning\_rate} + (2 * l2)
838+
839+
&\quad pre\_shrink = \\frac{x}{y}
840+
841+
&\quad param = (abs(linear\_accum) > l1).select(pre\_shrink, 0.0)
842+
843+
&else:
844+
845+
&\quad y = \\frac{new\_accum^{-lr\_power}}{learning\_rate} + (2 * l2)
846+
847+
&\quad pre\_shrink = \\frac{x}{y}
848+
849+
&\quad param = (abs(linear\_accum) > l1).select(pre\_shrink, 0.0)
850+
851+
&squared\_accum += grad^2
852+
853+
Args:
854+
learning_rate (float|Variable): global learning rate.
855+
l1 (float):
856+
l2 (float):
857+
lr_power (float):
858+
859+
Raises:
860+
ValueError: If learning_rate, rho, epsilon, momentum are None.
861+
862+
Examples:
863+
.. code-block:: python
864+
865+
optimizer = fluid.optimizer.Ftrl(0.0001)
866+
_, params_grads = optimizer.minimize(cost)
867+
"""
868+
869+
_squared_acc_str = "squared"
870+
_linear_acc_str = "linear"
871+
872+
def __init__(self, learning_rate, l1=0.0, l2=0.0, lr_power=-0.5, **kwargs):
873+
super(FtrlOptimizer, self).__init__(
874+
learning_rate=learning_rate, **kwargs)
875+
if learning_rate is None:
876+
raise ValueError("learning_rate is not set.")
877+
878+
self.type = "ftrl"
879+
self._l1 = l1
880+
self._l2 = l2
881+
self._lr_power = lr_power
882+
883+
def _create_accumulators(self, block, parameters):
884+
if not isinstance(block, framework.Block):
885+
raise TypeError("block is not instance of framework.Block.")
886+
887+
for p in parameters:
888+
self._add_accumulator(self._squared_acc_str, p)
889+
self._add_accumulator(self._linear_acc_str, p)
890+
891+
def _append_optimize_op(self, block, param_and_grad):
892+
if not isinstance(block, framework.Block):
893+
raise TypeError("block is not instance of framework.Block.")
894+
895+
squared_acc = self._get_accumulator(self._squared_acc_str,
896+
param_and_grad[0])
897+
linear_acc = self._get_accumulator(self._linear_acc_str,
898+
param_and_grad[0])
899+
ftrl_op = block.append_op(
900+
type=self.type,
901+
inputs={
902+
"Param": param_and_grad[0],
903+
"Grad": param_and_grad[1],
904+
"SquaredAccumulator": squared_acc,
905+
"LinearAccumulator": linear_acc,
906+
"LearningRate": self._create_param_lr(param_and_grad),
907+
},
908+
outputs={
909+
"ParamOut": param_and_grad[0],
910+
"SquaredAccumOut": squared_acc,
911+
"LinearAccumOut": linear_acc
912+
},
913+
attrs={"l1": self._l1,
914+
"l2": self._l1,
915+
"lr_power": self._lr_power})
916+
917+
return ftrl_op
918+
919+
813920
# We short the class name, since users will use the optimizer with the package
814921
# name. The sample code:
815922
#
@@ -826,6 +933,7 @@ def _append_optimize_op(self, block, param_and_grad):
826933
DecayedAdagrad = DecayedAdagradOptimizer
827934
Adadelta = AdadeltaOptimizer
828935
RMSProp = RMSPropOptimizer
936+
Ftrl = FtrlOptimizer
829937

830938

831939
class ModelAverage(Optimizer):

python/paddle/fluid/tests/unittests/test_optimizer.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,5 +434,71 @@ def test_decayed_adagrad_optimizer(self):
434434
self.assertAlmostEqual(init_ops[1].attr('value'), 0.0)
435435

436436

437+
class TestFtrlOptimizer(unittest.TestCase):
438+
class MockFtrl(optimizer.FtrlOptimizer):
439+
def get_accumulators(self):
440+
return self._accumulators
441+
442+
def get_squared_str(self):
443+
return self._squared_acc_str
444+
445+
def get_linear_str(self):
446+
return self._linear_acc_str
447+
448+
def test_ftrl_optimizer(self):
449+
init_program = framework.Program()
450+
program = framework.Program()
451+
block = program.global_block()
452+
mul_x = block.create_parameter(
453+
dtype="float32",
454+
shape=[5, 10],
455+
lod_level=0,
456+
name="mul.x",
457+
optimize_attr={'learning_rate': 1.1})
458+
mul_y = block.create_var(
459+
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
460+
mul_out = block.create_var(
461+
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
462+
block.append_op(
463+
type="mul",
464+
inputs={"X": mul_x,
465+
"Y": mul_y},
466+
outputs={"Out": mul_out},
467+
attrs={"x_num_col_dims": 1})
468+
mean_out = block.create_var(
469+
dtype="float32", shape=[1], lod_level=0, name="mean.out")
470+
block.append_op(
471+
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
472+
learning_rate = 0.01
473+
ftrl_optimizer = self.MockFtrl(
474+
learning_rate=learning_rate, l1=0.0, l2=0.0, lr_power=-0.5)
475+
params_grads = append_backward(mean_out)
476+
self.assertEqual(len(params_grads), 1)
477+
self.assertEqual(len(ftrl_optimizer.get_accumulators()), 0)
478+
opts = ftrl_optimizer.create_optimization_pass(params_grads, mul_out,
479+
init_program)
480+
self.assertEqual(len(opts), 3)
481+
self.assertEqual([op.type for op in opts],
482+
["fill_constant", "elementwise_mul", "ftrl"])
483+
484+
# Check accumulators
485+
accumulators = ftrl_optimizer.get_accumulators()
486+
self.assertEqual(len(accumulators), 2)
487+
self.assertTrue(ftrl_optimizer.get_squared_str() in accumulators)
488+
self.assertTrue(ftrl_optimizer.get_linear_str() in accumulators)
489+
squared_acc = accumulators[ftrl_optimizer.get_squared_str()]
490+
linear_acc = accumulators[ftrl_optimizer.get_linear_str()]
491+
self.assertEqual(len(squared_acc), 1)
492+
self.assertEqual(len(linear_acc), 1)
493+
self.assertTrue(mul_x.name in squared_acc)
494+
self.assertTrue(mul_x.name in linear_acc)
495+
496+
# Check init_program
497+
init_ops = init_program.global_block().ops
498+
self.assertEqual(len(init_ops), 3)
499+
self.assertEqual(init_ops[0].type, "fill_constant")
500+
self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
501+
502+
437503
if __name__ == '__main__':
438504
unittest.main()

0 commit comments

Comments
 (0)