@@ -36,10 +36,15 @@ class Optimizer(object):
36
36
"""
37
37
38
38
def __init__ (self , learning_rate , global_step = None , regularization = None ):
39
- assert learning_rate is not None
39
+ if not isinstance (learning_rate , float ) and \
40
+ not isinstance (learning_rate , framework .Variable ):
41
+ raise ValueError ("learning rate should be float or Variable" )
40
42
self ._global_step = global_step
41
43
self .regularization = regularization
42
- self ._global_learning_rate = learning_rate
44
+ self ._learning_rate = learning_rate
45
+ # each program should have a independent learning rate
46
+ # program -> Variable(learning_rate)
47
+ self ._learning_rate_map = defaultdict (lambda : None )
43
48
# Dictionary of accumulators. Some optimizer subclasses need to
44
49
# allocate and manage extra variables associated with the parameters
45
50
# to train. These variables are called accumulators.
@@ -48,26 +53,33 @@ def __init__(self, learning_rate, global_step=None, regularization=None):
48
53
self .helper = None
49
54
50
55
def _create_global_learning_rate (self ):
51
- if isinstance (self ._global_learning_rate , float ):
52
- self ._global_learning_rate = layers .create_global_var (
53
- name = unique_name .generate ("learning_rate" ),
54
- shape = [1 ],
55
- value = float (self ._global_learning_rate ),
56
- dtype = 'float32' ,
57
- persistable = True )
58
-
59
- if not isinstance (self ._global_learning_rate , framework .Variable ):
60
- raise ValueError ("learning rate should be a Variable, "
61
- "actual type is %s" ,
62
- type (self ._global_learning_rate ))
63
-
64
- @property
65
- def global_learning_rate (self ):
56
+ lr = self .global_learning_rate ()
57
+
58
+ if isinstance (lr , framework .Variable ):
59
+ return
60
+ else :
61
+ if not isinstance (self ._learning_rate , float ):
62
+ raise ValueError (
63
+ "learning rate variable is create outside optimizer,"
64
+ "can not create new learning rate variable for new program" )
65
+
66
+ # create learning rate in the current main program
67
+ self ._learning_rate_map [framework .default_main_program (
68
+ )] = layers .create_global_var (
69
+ name = unique_name .generate ("learning_rate" ),
70
+ shape = [1 ],
71
+ value = float (self ._learning_rate ),
72
+ dtype = 'float32' ,
73
+ persistable = True )
74
+
75
+ def global_learning_rate (self , program = None ):
66
76
"""
67
77
get global decayed learning rate
68
78
:return:
69
79
"""
70
- return self ._global_learning_rate
80
+ if program is None :
81
+ program = framework .default_main_program ()
82
+ return self ._learning_rate_map [program ]
71
83
72
84
def _append_optimize_op (self , block , param_and_grad ):
73
85
""" append optimize operator to block and return all the added optimize_op
@@ -78,7 +90,7 @@ def _create_param_lr(self, param_and_grad):
78
90
# create learning rate variable for every parameter
79
91
param = param_and_grad [0 ]
80
92
param_lr = param .optimize_attr ['learning_rate' ]
81
- return self ._global_learning_rate * param_lr
93
+ return self .global_learning_rate () * param_lr
82
94
83
95
def _create_accumulators (self , block , parameters ):
84
96
"""Create all accumulators needed by the parameters
0 commit comments