Skip to content

Commit 9c25218

Browse files
authored
create learning rate variable for every parameter (#5524)
* create learning rate variable for every parameter * fix ci * set parameter lr relatively to global lr
1 parent 1c31bb9 commit 9c25218

File tree

1 file changed

+21
-77
lines changed

1 file changed

+21
-77
lines changed

python/paddle/v2/framework/optimizer.py

Lines changed: 21 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,21 @@ def _append_optimize_op(self, block, param_and_grad):
3535
"""
3636
raise NotImplementedError()
3737

38-
def _initialize_tensors(self, block):
39-
"""Create all necessary tensors, that will be shared for all parameter updates.
40-
41-
Tensors like learning rate should be initialized here.
42-
43-
Args:
44-
block: the block in which the loss variable is present
45-
"""
46-
pass
38+
def _create_param_lr(self, param_and_grad):
39+
# create learning rate variable for every parameter
40+
param = param_and_grad[0]
41+
param_lr = param.optimize_attr['learning_rate']
42+
param_lr_shape = [1]
43+
param_lr_var = self.helper.create_global_variable(
44+
name=unique_name("learning_rate"),
45+
dtype='float32',
46+
shape=param_lr_shape,
47+
lod_level=1,
48+
persistable=True)
49+
param_lr = param_lr * self._learning_rate
50+
self.helper.set_variable_initializer(
51+
var=param_lr_var, initializer=ConstantInitializer(param_lr))
52+
return param_lr_var
4753

4854
def _create_accumulators(self, block, parameters):
4955
"""Create all accumulators needed by the parameters
@@ -161,8 +167,6 @@ def create_optimization_pass(self,
161167
startup_program=startup_program)
162168
self._create_accumulators(loss.block,
163169
[p[0] for p in parameters_and_grads])
164-
# Create any necessary tensors
165-
self._initialize_tensors(loss.block)
166170

167171
optimize_ops = []
168172
for param_and_grad in parameters_and_grads:
@@ -214,27 +218,16 @@ def __init__(self, learning_rate, global_step=None):
214218
self.type = "sgd"
215219
self._learning_rate = learning_rate
216220

217-
def _initialize_tensors(self, block):
218-
lr_shape = [1]
219-
# create a variable for learning_rate
220-
self._lr = self.helper.create_global_variable(
221-
name=unique_name("learning_rate"),
222-
dtype='float32',
223-
shape=lr_shape,
224-
lod_level=1,
225-
persistable=True)
226-
self.helper.set_variable_initializer(
227-
var=self._lr, initializer=ConstantInitializer(self._learning_rate))
228-
229221
def _append_optimize_op(self, block, param_and_grad):
230222
assert isinstance(block, framework.Block)
223+
231224
# create the optimize op
232225
sgd_op = block.append_op(
233226
type=self.type,
234227
inputs={
235228
"Param": param_and_grad[0],
236229
"Grad": param_and_grad[1],
237-
"LearningRate": self._lr
230+
"LearningRate": self._create_param_lr(param_and_grad)
238231
},
239232
outputs={"ParamOut": param_and_grad[0]})
240233

@@ -259,19 +252,6 @@ def __init__(self,
259252
self._momentum = momentum
260253
self._use_nesterov = bool(use_nesterov)
261254

262-
def _initialize_tensors(self, block):
263-
assert isinstance(block, framework.Block)
264-
lr_shape = [1]
265-
# create a variable for learning_rate
266-
self._lr = self.helper.create_global_variable(
267-
name=unique_name("learning_rate"),
268-
dtype='float32',
269-
shape=lr_shape,
270-
lod_level=1,
271-
persistable=True)
272-
self.helper.set_variable_initializer(
273-
var=self._lr, initializer=ConstantInitializer(self._learning_rate))
274-
275255
def _create_accumulators(self, block, parameters):
276256
assert isinstance(block, framework.Block)
277257

@@ -290,7 +270,7 @@ def _append_optimize_op(self, block, param_and_grad):
290270
"Param": param_and_grad[0],
291271
"Grad": param_and_grad[1],
292272
"Velocity": velocity_acc,
293-
"LearningRate": self._lr
273+
"LearningRate": self._create_param_lr(param_and_grad)
294274
},
295275
outputs={
296276
"ParamOut": param_and_grad[0],
@@ -315,18 +295,6 @@ def __init__(self, learning_rate, epsilon=1.0e-6, global_step=None):
315295
self._learning_rate = learning_rate
316296
self._epsilon = epsilon
317297

318-
def _initialize_tensors(self, block):
319-
lr_shape = [1]
320-
# create a variable for learning_rate
321-
self._lr = self.helper.create_global_variable(
322-
name=unique_name("learning_rate"),
323-
dtype='float32',
324-
shape=lr_shape,
325-
lod_level=1,
326-
persistable=True)
327-
self.helper.set_variable_initializer(
328-
var=self._lr, initializer=ConstantInitializer(self._learning_rate))
329-
330298
def _create_accumulators(self, block, parameters):
331299
assert isinstance(block, framework.Block)
332300

@@ -346,7 +314,7 @@ def _append_optimize_op(self, block, param_and_grad):
346314
"Param": param_and_grad[0],
347315
"Grad": param_and_grad[1],
348316
"Moment": moment_acc,
349-
"LearningRate": self._lr
317+
"LearningRate": self._create_param_lr(param_and_grad)
350318
},
351319
outputs={"ParamOut": param_and_grad[0],
352320
"MomentOut": moment_acc},
@@ -378,18 +346,6 @@ def __init__(self,
378346
self._beta2 = beta2
379347
self._epsilon = epsilon
380348

381-
def _initialize_tensors(self, block):
382-
lr_shape = [1]
383-
# create a variable for learning_rate
384-
self._lr = self.helper.create_global_variable(
385-
name=unique_name("learning_rate"),
386-
dtype='float32',
387-
shape=lr_shape,
388-
lod_level=1,
389-
persistable=True)
390-
self.helper.set_variable_initializer(
391-
var=self._lr, initializer=ConstantInitializer(self._learning_rate))
392-
393349
def _create_accumulators(self, block, parameters):
394350
assert isinstance(block, framework.Block)
395351

@@ -433,7 +389,7 @@ def _append_optimize_op(self, block, param_and_grad):
433389
inputs={
434390
"Param": param_and_grad[0],
435391
"Grad": param_and_grad[1],
436-
"LearningRate": self._lr,
392+
"LearningRate": self._create_param_lr(param_and_grad),
437393
"Moment1": moment1,
438394
"Moment2": moment2,
439395
"Beta1Pow": self._beta1_pow_acc,
@@ -495,18 +451,6 @@ def __init__(self,
495451
self._beta2 = beta2
496452
self._epsilon = epsilon
497453

498-
def _initialize_tensors(self, block):
499-
lr_shape = [1]
500-
# create a variable for learning_rate
501-
self._lr = self.helper.create_global_variable(
502-
name=unique_name("learning_rate"),
503-
dtype='float32',
504-
shape=lr_shape,
505-
lod_level=1,
506-
persistable=True)
507-
self.helper.set_variable_initializer(
508-
var=self._lr, initializer=ConstantInitializer(self._learning_rate))
509-
510454
def _create_accumulators(self, block, parameters):
511455
# Create beta1 power accumulator tensor
512456
beta_shape = [1]
@@ -536,7 +480,7 @@ def _append_optimize_op(self, block, param_and_grad):
536480
inputs={
537481
"Param": param_and_grad[0],
538482
"Grad": param_and_grad[1],
539-
"LearningRate": self._lr,
483+
"LearningRate": self._create_param_lr(param_and_grad),
540484
"Moment": moment,
541485
"InfNorm": inf_norm,
542486
"Beta1Pow": self._beta1_pow_acc

0 commit comments

Comments
 (0)