PaddlePaddle
diff --git a/‎python/paddle/fluid/dygraph/checkpoint.py
Lines changed: 1 addition & 1 deletion b/‎python/paddle/fluid/dygraph/checkpoint.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/paddle/fluid/dygraph/learning_rate_scheduler.py
Lines changed: 76 additions & 15 deletions b/‎python/paddle/fluid/dygraph/learning_rate_scheduler.py
Lines changed: 76 additions & 15 deletions
diff --git a/‎python/paddle/fluid/optimizer.py
Lines changed: 34 additions & 33 deletions b/‎python/paddle/fluid/optimizer.py
Lines changed: 34 additions & 33 deletions
@@ -78,9 +78,9 @@ def save_dygraph(state_dict, model_path):
     for k, v in state_dict.items():
         if isinstance(v, (Variable, core.VarBase)):
             model_dict[k] = v.numpy()
+            name_table[k] = v.name
         else:
             model_dict[k] = v
-        name_table[k] = v.name
     model_dict["StructuredToParameterName@@"] = name_table
 
     file_name = model_path + suffix
 
@@ -15,6 +15,7 @@
 from __future__ import print_function
 
 import math
+import warnings
 
 from .. import unique_name
 from ..framework import Variable
@@ -66,6 +67,51 @@ def create_lr_var(self, lr):
             persistable=False)
         return lr
 
+    def state_dict(self):
+        """
+        Returns the state of the scheduler as a :class:`dict`.
+
+        It is a subset of self.__dict__ .
+        """
+        self._state_keys()
+        state_dict = {}
+        for key in self.keys:
+            if key not in self.__dict__:
+                continue
+            value = self.__dict__[key]
+            if isinstance(value, Variable):
+                assert value.shape == [
+                    1
+                ], "shape of Variable in state_dict must be [1] {}".format(
+                    value.shape)
+                value = value.numpy()[0]
+            state_dict[key] = value
+
+        return state_dict
+
+    def _state_keys(self):
+        """
+        set the keys in self.__dict__ that are needed to be saved.
+        """
+        self.keys = ['step_num']
+
+    def set_dict(self, state_dict):
+        """
+        Loads the schedulers state.
+        """
+        self._state_keys()
+        for key in self.keys:
+            if key in state_dict:
+                self.__dict__[key] = state_dict[key]
+            else:
+                raise RuntimeError(
+                    "Please check whether state_dict is correct for optimizer. Can't find [ {} ] in state_dict".
+                    format(key))
+        if len(state_dict) > len(self.keys):
+            warnings.warn(
+                "There are some unused values in state_dict. Maybe the optimizer have different 'LearningRateDecay' when invoking state_dict and set_dict"
+            )
+
     def step(self):
         raise NotImplementedError()
 
@@ -402,7 +448,7 @@ class PolynomialDecay(LearningRateDecay):
         learning_rate(Variable|float): The initial learning rate. If the type 
             is Variable, it's a tensor with shape [1], the data type can be  
             float32 or float64. It also can be set to python int number.
-        decay_steps(int32): The decay step size. It determines the decay cycle.
+        decay_steps(int): The decay step size. It determines the decay cycle.
         end_learning_rate(float, optional): The minimum final learning rate. The default value is 0.0001.
         power(float, optional): Power of polynomial. The default value is 1.0.
         cycle(bool, optional): If set true, decay the learning rate every decay_steps. The default value is False.
@@ -784,7 +830,7 @@ def __init__(self,
             raise ValueError(
                 'new_lr = origin_lr * decay_rate and decay_rate should be < 1.0.'
             )
-        self.decay_rate = decay_rate
+        self.decay_rate = self.create_lr_var(decay_rate)
 
         threshold_mode = threshold_mode.lower()
         if threshold_mode not in ['rel', 'abs']:
@@ -793,8 +839,10 @@ def __init__(self,
         self.threshold_mode = threshold_mode
         check_type(learning_rate, 'learning_rate', (float, int, Variable),
                    'ReduceLROnPlateau')
-        if isinstance(learning_rate, (float, int)):
-            learning_rate = self.create_lr_var(learning_rate)
+        if not isinstance(learning_rate, (float, int, Variable)):
+            raise TypeError(
+                "The type of 'learning_rate' in 'ReduceLROnPlateau' must be 'float, int, Variable', but received %s."
+                % type(learning_rate))
 
         self.learning_rate = learning_rate
         self.verbose = verbose
@@ -808,9 +856,17 @@ def __init__(self,
         self.cooldown_counter = 0
         self.best_loss = None
         self.num_bad_epochs = 0
-        self.epoch = 0
+        self.epoch_num = 0
+
+    def _state_keys(self):
+        self.keys = [
+            'cooldown_counter', 'best_loss', 'num_bad_epochs', 'epoch_num',
+            'learning_rate'
+        ]
 
     def __call__(self):
+        if not isinstance(self.learning_rate, Variable):
+            self.learning_rate = self.create_lr_var(self.learning_rate)
         return self.learning_rate
 
     def step(self, loss):
@@ -836,7 +892,7 @@ def step(self, loss):
             "should be (1L,), but the current loss.shape is {}. Maybe that "  \
             "you should call fluid.layers.mean to process it first.".format(loss.shape)
 
-        self.epoch += 1
+        self.epoch_num += 1
         if self.cooldown_counter > 0:
             self.cooldown_counter -= 1
         else:
@@ -854,10 +910,11 @@ def step(self, loss):
                                                 self.decay_rate, self.min_lr)
                 if self.learning_rate - new_lr > self.eps:
                     if self.verbose:
+                        old_lr = self.learning_rate.numpy()[0] if isinstance(
+                            self.learning_rate,
+                            Variable) else self.learning_rate
                         print('Epoch {}: reducing learning rate from {} to {}.'.
-                              format(self.epoch,
-                                     self.learning_rate.numpy()[0],
-                                     new_lr.numpy()[0]))
+                              format(self.epoch_num, old_lr, new_lr.numpy()[0]))
                     self.learning_rate = new_lr
 
     def _is_better(self, current, best):
@@ -890,22 +947,28 @@ def __init__(self, learning_rate, dtype=None):
             raise TypeError(
                 "The type of 'learning_rate' must be 'float, int', but received %s."
                 % type(learning_rate))
-        if learning_rate >= 1.0:
-            raise ValueError("The initial learning rate")
+        if learning_rate < 0:
+            raise ValueError("Invalid learning rate: {}".format(learning_rate))
 
         self.base_lr = float(learning_rate)
 
         self.epoch_num = -1
+        self.dtype = dtype
         if dtype is None:
             self.dtype = "float32"
         self.learning_rate = self.create_lr_var(self.base_lr)
 
         self.epoch()
 
+    def _state_keys(self):
+        self.keys = ['epoch_num', 'learning_rate']
+
     def __call__(self):
         """ 
         Return last computed learning rate on current epoch.
         """
+        if not isinstance(self.learning_rate, Variable):
+            self.learning_rate = self.create_lr_var(self.learning_rate)
         return self.learning_rate
 
     def epoch(self, epoch=None):
@@ -918,8 +981,6 @@ def epoch(self, epoch=None):
             self.epoch_num = epoch
 
         self.learning_rate = self.get_lr()
-        if isinstance(self.learning_rate, float):
-            self.learning_rate = self.create_lr_var(self.learning_rate)
 
     def get_lr(self):
         raise NotImplementedError
@@ -946,7 +1007,7 @@ class StepDecay(_LearningRateEpochDecay):
 
     Parameters:
         learning_rate (float|int): The initial learning rate. It can be set to python float or int number.
-        step_size (int): Period of learning rate decay..
+        step_size (int): Period of learning rate decay.
         decay_rate (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * decay_rate`` . 
             It should be less than 1.0. Default: 0.1.
 
@@ -1024,7 +1085,7 @@ class MultiStepDecay(_LearningRateEpochDecay):
             learning_rate = 0.005
 
     Parameters:
-        learning_rate (float|int): The initial learning rate. It can be set to python float or int number. If it
+        learning_rate (float|int): The initial learning rate. It can be set to python float or int number.
         milestones (tuple|list): List or tuple of each boundaries. Must be increasing.
         decay_rate (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * decay_rate`` . 
             It should be less than 1.0. Default: 0.1.
 
@@ -33,7 +33,7 @@
 from .regularizer import append_regularization_ops
 from .dygraph import base as imperative_base
 from .dygraph import no_grad
-from .dygraph.learning_rate_scheduler import LearningRateDecay
+from .dygraph.learning_rate_scheduler import LearningRateDecay, _LearningRateEpochDecay
 from paddle.fluid import core
 from paddle.fluid.layers import tensor
 from functools import reduce
@@ -148,17 +148,17 @@ def state_dict(self):
                 state_dict[var_tmp.name] = var_tmp
         # global step if use lr decay
         if isinstance(self._learning_rate, LearningRateDecay):
-            var_tmp = None
-            if framework.in_dygraph_mode():
+            state_dict["LR_Scheduler"] = self._learning_rate.state_dict()
+
+            if not isinstance(self._learning_rate, _LearningRateEpochDecay):
+                var_tmp = None
                 var_temp = framework._varbase_creator(
                     None, name='global_step', dtype='int32')
-            else:
-                var_temp = Variable(None, name='global_step', dtype='int32')
 
-            tensor.fill_constant(
-                [1], "int32", self._learning_rate.step_num, out=var_temp)
+                tensor.fill_constant(
+                    [1], "int32", self._learning_rate.step_num, out=var_temp)
 
-            state_dict['global_step'] = var_temp
+                state_dict['global_step'] = var_temp
         return state_dict
 
     @framework.dygraph_only
@@ -192,30 +192,28 @@ def set_dict(self, state_dict):
         '''
 
         if isinstance(self._learning_rate, LearningRateDecay):
-            assert 'global_step' in state_dict, \
-                    'Global step not in state dict, Dygraph use LearningRateDecay, global_step must in state_dict'
-            global_step = state_dict['global_step']
-
-            if isinstance(global_step, core.VarBase):
-                step_np = global_step
-                step_np = np.array(step_np.value().get_tensor())
-                assert step_np.shape == (1,),  \
-                        "global step shape is (1,), the shape is {}".format( step_np.shape )
-
-                self._learning_rate.step_num = int(step_np[0])
-            elif isinstance(global_step, Variable):
-                step_np = global_step.numpy()
-                assert step_np.shape == (1,),  \
-                        "global step shape is (1,), the shape is {}".format( step_np.shape )
-                self._learning_rate.step_num = step_np[0]
-            elif isinstance(global_step, np.ndarray):
-                assert global_step.shape == (1,),  \
-                        "global step shape is (1,), the shape is {}".format( global_step.shape )
-                self._learning_rate.step_num = global_step[0]
-            else:
-                raise RuntimeError(
-                    "Type not supprt, value in state dict must be [VarBase, Variable, numpy], the type is ",
-                    type(global_step))
+            self._learning_rate.set_dict(state_dict["LR_Scheduler"])
+
+            if not isinstance(self._learning_rate, _LearningRateEpochDecay):
+                assert 'global_step' in state_dict, \
+                        'Global step not in state dict, Dygraph use LearningRateDecay, global_step must in state_dict'
+                global_step = state_dict['global_step']
+
+                if isinstance(global_step, Variable):
+                    step_np = global_step
+                    step_np = np.array(step_np.value().get_tensor())
+                    assert step_np.shape == (1,),  \
+                            "global step shape is (1,), the shape is {}".format( step_np.shape )
+
+                    self._learning_rate.step_num = int(step_np[0])
+                elif isinstance(global_step, np.ndarray):
+                    assert global_step.shape == (1,),  \
+                            "global step shape is (1,), the shape is {}".format( global_step.shape )
+                    self._learning_rate.step_num = global_step[0]
+                else:
+                    raise RuntimeError(
+                        "Type not supprt, value in state dict must be [VarBase, Variable, numpy], the type is ",
+                        type(global_step))
 
         self._accumulators_holder = state_dict
         for k, v in self._accumulators.items():
@@ -346,11 +344,14 @@ def current_step_lr(self):
 
         """
         current_lr = self._global_learning_rate()
-        if current_lr:
+        if isinstance(current_lr, framework.Variable):
             return self._global_learning_rate().numpy()[0]
 
         if isinstance(self._learning_rate, float):
             return self._learning_rate
+        elif isinstance(self._learning_rate, _LearningRateEpochDecay):
+            step_lr = self._learning_rate()
+            return step_lr.numpy()[0]
         else:
             step_lr = self._learning_rate.step()
             if isinstance(step_lr, (float, int)):