Skip to content

Commit ffa88c3

Browse files
author
lilong12
authored
fix the bug of all_reduce pipeline gradient multiple times (#30437) (#30595)
* update, test=develop
1 parent d15e73b commit ffa88c3

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ def _insert_allreduce_ops(self, ring_id):
233233
block = self.main_program_list[ring_id - 1]['program'].global_block()
234234
origin_block = self.main_program.global_block()
235235
grad = None
236+
processed_param_name = set()
236237
for idx, op in reversed(list(enumerate(block.ops))):
237238
if is_backward_op(op) and \
238239
OP_ROLE_VAR_KEY in op.attr_names:
@@ -242,7 +243,10 @@ def _insert_allreduce_ops(self, ring_id):
242243
assert len(op_role_var) % 2 == 0
243244
offset = idx
244245
for i in range(0, len(op_role_var), 2):
246+
param_name = op_role_var[i]
245247
param = block.vars[op_role_var[i]]
248+
if param_name in processed_param_name: continue
249+
processed_param_name.add(param_name)
246250
grad = block.vars[op_role_var[i + 1]]
247251
origin_param = origin_block.vars[op_role_var[i]]
248252
if origin_param.is_distributed:

0 commit comments

Comments
 (0)