@@ -35,15 +35,21 @@ def _append_optimize_op(self, block, param_and_grad):
35
35
"""
36
36
raise NotImplementedError ()
37
37
38
- def _initialize_tensors (self , block ):
39
- """Create all necessary tensors, that will be shared for all parameter updates.
40
-
41
- Tensors like learning rate should be initialized here.
42
-
43
- Args:
44
- block: the block in which the loss variable is present
45
- """
46
- pass
38
+ def _create_param_lr (self , param_and_grad ):
39
+ # create learning rate variable for every parameter
40
+ param = param_and_grad [0 ]
41
+ param_lr = param .optimize_attr ['learning_rate' ]
42
+ param_lr_shape = [1 ]
43
+ param_lr_var = self .helper .create_global_variable (
44
+ name = unique_name ("learning_rate" ),
45
+ dtype = 'float32' ,
46
+ shape = param_lr_shape ,
47
+ lod_level = 1 ,
48
+ persistable = True )
49
+ param_lr = param_lr * self ._learning_rate
50
+ self .helper .set_variable_initializer (
51
+ var = param_lr_var , initializer = ConstantInitializer (param_lr ))
52
+ return param_lr_var
47
53
48
54
def _create_accumulators (self , block , parameters ):
49
55
"""Create all accumulators needed by the parameters
@@ -161,8 +167,6 @@ def create_optimization_pass(self,
161
167
startup_program = startup_program )
162
168
self ._create_accumulators (loss .block ,
163
169
[p [0 ] for p in parameters_and_grads ])
164
- # Create any necessary tensors
165
- self ._initialize_tensors (loss .block )
166
170
167
171
optimize_ops = []
168
172
for param_and_grad in parameters_and_grads :
@@ -214,27 +218,16 @@ def __init__(self, learning_rate, global_step=None):
214
218
self .type = "sgd"
215
219
self ._learning_rate = learning_rate
216
220
217
- def _initialize_tensors (self , block ):
218
- lr_shape = [1 ]
219
- # create a variable for learning_rate
220
- self ._lr = self .helper .create_global_variable (
221
- name = unique_name ("learning_rate" ),
222
- dtype = 'float32' ,
223
- shape = lr_shape ,
224
- lod_level = 1 ,
225
- persistable = True )
226
- self .helper .set_variable_initializer (
227
- var = self ._lr , initializer = ConstantInitializer (self ._learning_rate ))
228
-
229
221
def _append_optimize_op (self , block , param_and_grad ):
230
222
assert isinstance (block , framework .Block )
223
+
231
224
# create the optimize op
232
225
sgd_op = block .append_op (
233
226
type = self .type ,
234
227
inputs = {
235
228
"Param" : param_and_grad [0 ],
236
229
"Grad" : param_and_grad [1 ],
237
- "LearningRate" : self ._lr
230
+ "LearningRate" : self ._create_param_lr ( param_and_grad )
238
231
},
239
232
outputs = {"ParamOut" : param_and_grad [0 ]})
240
233
@@ -259,19 +252,6 @@ def __init__(self,
259
252
self ._momentum = momentum
260
253
self ._use_nesterov = bool (use_nesterov )
261
254
262
- def _initialize_tensors (self , block ):
263
- assert isinstance (block , framework .Block )
264
- lr_shape = [1 ]
265
- # create a variable for learning_rate
266
- self ._lr = self .helper .create_global_variable (
267
- name = unique_name ("learning_rate" ),
268
- dtype = 'float32' ,
269
- shape = lr_shape ,
270
- lod_level = 1 ,
271
- persistable = True )
272
- self .helper .set_variable_initializer (
273
- var = self ._lr , initializer = ConstantInitializer (self ._learning_rate ))
274
-
275
255
def _create_accumulators (self , block , parameters ):
276
256
assert isinstance (block , framework .Block )
277
257
@@ -290,7 +270,7 @@ def _append_optimize_op(self, block, param_and_grad):
290
270
"Param" : param_and_grad [0 ],
291
271
"Grad" : param_and_grad [1 ],
292
272
"Velocity" : velocity_acc ,
293
- "LearningRate" : self ._lr
273
+ "LearningRate" : self ._create_param_lr ( param_and_grad )
294
274
},
295
275
outputs = {
296
276
"ParamOut" : param_and_grad [0 ],
@@ -315,18 +295,6 @@ def __init__(self, learning_rate, epsilon=1.0e-6, global_step=None):
315
295
self ._learning_rate = learning_rate
316
296
self ._epsilon = epsilon
317
297
318
- def _initialize_tensors (self , block ):
319
- lr_shape = [1 ]
320
- # create a variable for learning_rate
321
- self ._lr = self .helper .create_global_variable (
322
- name = unique_name ("learning_rate" ),
323
- dtype = 'float32' ,
324
- shape = lr_shape ,
325
- lod_level = 1 ,
326
- persistable = True )
327
- self .helper .set_variable_initializer (
328
- var = self ._lr , initializer = ConstantInitializer (self ._learning_rate ))
329
-
330
298
def _create_accumulators (self , block , parameters ):
331
299
assert isinstance (block , framework .Block )
332
300
@@ -346,7 +314,7 @@ def _append_optimize_op(self, block, param_and_grad):
346
314
"Param" : param_and_grad [0 ],
347
315
"Grad" : param_and_grad [1 ],
348
316
"Moment" : moment_acc ,
349
- "LearningRate" : self ._lr
317
+ "LearningRate" : self ._create_param_lr ( param_and_grad )
350
318
},
351
319
outputs = {"ParamOut" : param_and_grad [0 ],
352
320
"MomentOut" : moment_acc },
@@ -378,18 +346,6 @@ def __init__(self,
378
346
self ._beta2 = beta2
379
347
self ._epsilon = epsilon
380
348
381
- def _initialize_tensors (self , block ):
382
- lr_shape = [1 ]
383
- # create a variable for learning_rate
384
- self ._lr = self .helper .create_global_variable (
385
- name = unique_name ("learning_rate" ),
386
- dtype = 'float32' ,
387
- shape = lr_shape ,
388
- lod_level = 1 ,
389
- persistable = True )
390
- self .helper .set_variable_initializer (
391
- var = self ._lr , initializer = ConstantInitializer (self ._learning_rate ))
392
-
393
349
def _create_accumulators (self , block , parameters ):
394
350
assert isinstance (block , framework .Block )
395
351
@@ -433,7 +389,7 @@ def _append_optimize_op(self, block, param_and_grad):
433
389
inputs = {
434
390
"Param" : param_and_grad [0 ],
435
391
"Grad" : param_and_grad [1 ],
436
- "LearningRate" : self ._lr ,
392
+ "LearningRate" : self ._create_param_lr ( param_and_grad ) ,
437
393
"Moment1" : moment1 ,
438
394
"Moment2" : moment2 ,
439
395
"Beta1Pow" : self ._beta1_pow_acc ,
@@ -495,18 +451,6 @@ def __init__(self,
495
451
self ._beta2 = beta2
496
452
self ._epsilon = epsilon
497
453
498
- def _initialize_tensors (self , block ):
499
- lr_shape = [1 ]
500
- # create a variable for learning_rate
501
- self ._lr = self .helper .create_global_variable (
502
- name = unique_name ("learning_rate" ),
503
- dtype = 'float32' ,
504
- shape = lr_shape ,
505
- lod_level = 1 ,
506
- persistable = True )
507
- self .helper .set_variable_initializer (
508
- var = self ._lr , initializer = ConstantInitializer (self ._learning_rate ))
509
-
510
454
def _create_accumulators (self , block , parameters ):
511
455
# Create beta1 power accumulator tensor
512
456
beta_shape = [1 ]
@@ -536,7 +480,7 @@ def _append_optimize_op(self, block, param_and_grad):
536
480
inputs = {
537
481
"Param" : param_and_grad [0 ],
538
482
"Grad" : param_and_grad [1 ],
539
- "LearningRate" : self ._lr ,
483
+ "LearningRate" : self ._create_param_lr ( param_and_grad ) ,
540
484
"Moment" : moment ,
541
485
"InfNorm" : inf_norm ,
542
486
"Beta1Pow" : self ._beta1_pow_acc
0 commit comments