@@ -377,11 +377,6 @@ def get_pserver_program(self, endpoint):
377
377
# append it into the sub program.
378
378
379
379
global_ops = []
380
- # HACK: optimization global ops only used to scale beta1 and beta2
381
- # replace it with dependency engine.
382
- for op in self .optimize_ops :
383
- if self ._is_adam_connected_op (op ):
384
- global_ops .append (op )
385
380
386
381
def __append_optimize_op__ (op , block , grad_to_block_id , merged_var ,
387
382
lr_ops ):
@@ -1289,22 +1284,16 @@ def _is_op_connected(self, op1, op2):
1289
1284
# If one op's input is another op's output or
1290
1285
# one op's output is another op's input, we say
1291
1286
# the two operator is connected.
1292
- def _append_inname_remove_beta (varname_list ):
1287
+ def _append_inname (varname_list ):
1293
1288
op_input_names = []
1294
1289
for in_name in varname_list :
1295
- # HACK: remove beta1 and beta2 to avoid let all
1296
- # ops connected.
1297
- if in_name .startswith ("beta2_pow_acc" ) or \
1298
- in_name .startswith ("beta1_pow_acc" ):
1299
- continue
1300
- else :
1301
- op_input_names .append (in_name )
1290
+ op_input_names .append (in_name )
1302
1291
return op_input_names
1303
1292
1304
- op1_input_names = _append_inname_remove_beta (op1 .desc .input_arg_names ())
1293
+ op1_input_names = _append_inname (op1 .desc .input_arg_names ())
1305
1294
op1_output_names = op1 .desc .output_arg_names ()
1306
1295
1307
- op2_input_names = _append_inname_remove_beta (op2 .desc .input_arg_names ())
1296
+ op2_input_names = _append_inname (op2 .desc .input_arg_names ())
1308
1297
op2_output_names = op2 .desc .output_arg_names ()
1309
1298
1310
1299
if set (op1_output_names ) & set (op2_input_names ) or \
@@ -1413,7 +1402,7 @@ def _is_opt_role_op(self, op):
1413
1402
1414
1403
def _get_optimize_pass (self ):
1415
1404
"""
1416
- Get optimizer operators, paramters and gradients from origin_program
1405
+ Get optimizer operators, parameters and gradients from origin_program
1417
1406
Returns:
1418
1407
opt_ops (list): optimize operators.
1419
1408
params_grads (dict): paramter->gradient.
@@ -1436,20 +1425,6 @@ def _get_optimize_pass(self):
1436
1425
origin_var_dict [param_name ],
1437
1426
origin_var_dict [input_name ]
1438
1427
])
1439
- elif self ._is_adam_connected_op (op ):
1440
- opt_ops .append (op )
1441
1428
else :
1442
1429
pass
1443
1430
return opt_ops , params_grads
1444
-
1445
- def _is_adam_connected_op (self , op ):
1446
- """
1447
- A hack function to determinate whether the input operator
1448
- is connected to optimize operator.
1449
- """
1450
- if op .type == "scale" :
1451
- for in_name in op .input_arg_names :
1452
- if in_name .startswith ("beta1_pow_acc" ) or \
1453
- in_name .startswith ("beta2_pow_acc" ):
1454
- return True
1455
- return False
0 commit comments