15
15
from __future__ import print_function
16
16
17
17
import math
18
+ import warnings
18
19
19
20
from .. import unique_name
20
21
from ..framework import Variable
@@ -66,6 +67,51 @@ def create_lr_var(self, lr):
66
67
persistable = False )
67
68
return lr
68
69
70
+ def state_dict (self ):
71
+ """
72
+ Returns the state of the scheduler as a :class:`dict`.
73
+
74
+ It is a subset of self.__dict__ .
75
+ """
76
+ self ._state_keys ()
77
+ state_dict = {}
78
+ for key in self .keys :
79
+ if key not in self .__dict__ :
80
+ continue
81
+ value = self .__dict__ [key ]
82
+ if isinstance (value , Variable ):
83
+ assert value .shape == [
84
+ 1
85
+ ], "shape of Variable in state_dict must be [1] {}" .format (
86
+ value .shape )
87
+ value = value .numpy ()[0 ]
88
+ state_dict [key ] = value
89
+
90
+ return state_dict
91
+
92
+ def _state_keys (self ):
93
+ """
94
+ set the keys in self.__dict__ that are needed to be saved.
95
+ """
96
+ self .keys = ['step_num' ]
97
+
98
+ def set_dict (self , state_dict ):
99
+ """
100
+ Loads the schedulers state.
101
+ """
102
+ self ._state_keys ()
103
+ for key in self .keys :
104
+ if key in state_dict :
105
+ self .__dict__ [key ] = state_dict [key ]
106
+ else :
107
+ raise RuntimeError (
108
+ "Please check whether state_dict is correct for optimizer. Can't find [ {} ] in state_dict" .
109
+ format (key ))
110
+ if len (state_dict ) > len (self .keys ):
111
+ warnings .warn (
112
+ "There are some unused values in state_dict. Maybe the optimizer have different 'LearningRateDecay' when invoking state_dict and set_dict"
113
+ )
114
+
69
115
def step (self ):
70
116
raise NotImplementedError ()
71
117
@@ -402,7 +448,7 @@ class PolynomialDecay(LearningRateDecay):
402
448
learning_rate(Variable|float): The initial learning rate. If the type
403
449
is Variable, it's a tensor with shape [1], the data type can be
404
450
float32 or float64. It also can be set to python int number.
405
- decay_steps(int32 ): The decay step size. It determines the decay cycle.
451
+ decay_steps(int ): The decay step size. It determines the decay cycle.
406
452
end_learning_rate(float, optional): The minimum final learning rate. The default value is 0.0001.
407
453
power(float, optional): Power of polynomial. The default value is 1.0.
408
454
cycle(bool, optional): If set true, decay the learning rate every decay_steps. The default value is False.
@@ -784,7 +830,7 @@ def __init__(self,
784
830
raise ValueError (
785
831
'new_lr = origin_lr * decay_rate and decay_rate should be < 1.0.'
786
832
)
787
- self .decay_rate = decay_rate
833
+ self .decay_rate = self . create_lr_var ( decay_rate )
788
834
789
835
threshold_mode = threshold_mode .lower ()
790
836
if threshold_mode not in ['rel' , 'abs' ]:
@@ -793,8 +839,10 @@ def __init__(self,
793
839
self .threshold_mode = threshold_mode
794
840
check_type (learning_rate , 'learning_rate' , (float , int , Variable ),
795
841
'ReduceLROnPlateau' )
796
- if isinstance (learning_rate , (float , int )):
797
- learning_rate = self .create_lr_var (learning_rate )
842
+ if not isinstance (learning_rate , (float , int , Variable )):
843
+ raise TypeError (
844
+ "The type of 'learning_rate' in 'ReduceLROnPlateau' must be 'float, int, Variable', but received %s."
845
+ % type (learning_rate ))
798
846
799
847
self .learning_rate = learning_rate
800
848
self .verbose = verbose
@@ -808,9 +856,17 @@ def __init__(self,
808
856
self .cooldown_counter = 0
809
857
self .best_loss = None
810
858
self .num_bad_epochs = 0
811
- self .epoch = 0
859
+ self .epoch_num = 0
860
+
861
+ def _state_keys (self ):
862
+ self .keys = [
863
+ 'cooldown_counter' , 'best_loss' , 'num_bad_epochs' , 'epoch_num' ,
864
+ 'learning_rate'
865
+ ]
812
866
813
867
def __call__ (self ):
868
+ if not isinstance (self .learning_rate , Variable ):
869
+ self .learning_rate = self .create_lr_var (self .learning_rate )
814
870
return self .learning_rate
815
871
816
872
def step (self , loss ):
@@ -836,7 +892,7 @@ def step(self, loss):
836
892
"should be (1L,), but the current loss.shape is {}. Maybe that " \
837
893
"you should call fluid.layers.mean to process it first." .format (loss .shape )
838
894
839
- self .epoch += 1
895
+ self .epoch_num += 1
840
896
if self .cooldown_counter > 0 :
841
897
self .cooldown_counter -= 1
842
898
else :
@@ -854,10 +910,11 @@ def step(self, loss):
854
910
self .decay_rate , self .min_lr )
855
911
if self .learning_rate - new_lr > self .eps :
856
912
if self .verbose :
913
+ old_lr = self .learning_rate .numpy ()[0 ] if isinstance (
914
+ self .learning_rate ,
915
+ Variable ) else self .learning_rate
857
916
print ('Epoch {}: reducing learning rate from {} to {}.' .
858
- format (self .epoch ,
859
- self .learning_rate .numpy ()[0 ],
860
- new_lr .numpy ()[0 ]))
917
+ format (self .epoch_num , old_lr , new_lr .numpy ()[0 ]))
861
918
self .learning_rate = new_lr
862
919
863
920
def _is_better (self , current , best ):
@@ -890,22 +947,28 @@ def __init__(self, learning_rate, dtype=None):
890
947
raise TypeError (
891
948
"The type of 'learning_rate' must be 'float, int', but received %s."
892
949
% type (learning_rate ))
893
- if learning_rate >= 1. 0 :
894
- raise ValueError ("The initial learning rate" )
950
+ if learning_rate < 0 :
951
+ raise ValueError ("Invalid learning rate: {}" . format ( learning_rate ) )
895
952
896
953
self .base_lr = float (learning_rate )
897
954
898
955
self .epoch_num = - 1
956
+ self .dtype = dtype
899
957
if dtype is None :
900
958
self .dtype = "float32"
901
959
self .learning_rate = self .create_lr_var (self .base_lr )
902
960
903
961
self .epoch ()
904
962
963
+ def _state_keys (self ):
964
+ self .keys = ['epoch_num' , 'learning_rate' ]
965
+
905
966
def __call__ (self ):
906
967
"""
907
968
Return last computed learning rate on current epoch.
908
969
"""
970
+ if not isinstance (self .learning_rate , Variable ):
971
+ self .learning_rate = self .create_lr_var (self .learning_rate )
909
972
return self .learning_rate
910
973
911
974
def epoch (self , epoch = None ):
@@ -918,8 +981,6 @@ def epoch(self, epoch=None):
918
981
self .epoch_num = epoch
919
982
920
983
self .learning_rate = self .get_lr ()
921
- if isinstance (self .learning_rate , float ):
922
- self .learning_rate = self .create_lr_var (self .learning_rate )
923
984
924
985
def get_lr (self ):
925
986
raise NotImplementedError
@@ -946,7 +1007,7 @@ class StepDecay(_LearningRateEpochDecay):
946
1007
947
1008
Parameters:
948
1009
learning_rate (float|int): The initial learning rate. It can be set to python float or int number.
949
- step_size (int): Period of learning rate decay..
1010
+ step_size (int): Period of learning rate decay.
950
1011
decay_rate (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * decay_rate`` .
951
1012
It should be less than 1.0. Default: 0.1.
952
1013
@@ -1024,7 +1085,7 @@ class MultiStepDecay(_LearningRateEpochDecay):
1024
1085
learning_rate = 0.005
1025
1086
1026
1087
Parameters:
1027
- learning_rate (float|int): The initial learning rate. It can be set to python float or int number. If it
1088
+ learning_rate (float|int): The initial learning rate. It can be set to python float or int number.
1028
1089
milestones (tuple|list): List or tuple of each boundaries. Must be increasing.
1029
1090
decay_rate (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * decay_rate`` .
1030
1091
It should be less than 1.0. Default: 0.1.
0 commit comments