Skip to content

Commit dd75fbd

Browse files
author
chengduo
authored
Merge pull request #9560 from chengduoZH/feature/fix_parallel_exe
Broadcast the gradient once it is generated
2 parents a4e437d + 494bee5 commit dd75fbd

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed

paddle/fluid/framework/details/multi_devices_graph_builder.cc

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
5555
const ProgramDesc &program) const {
5656
auto graph = new SSAGraph();
5757
SSAGraph &result = *graph;
58+
std::unordered_set<std::string> og_has_been_broadcast;
5859
result.vars_.resize(places_.size());
5960

6061
bool is_forwarding = true;
@@ -122,9 +123,15 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
122123

123124
if (!is_forwarding) {
124125
auto var_names = op->OutputArgumentNames();
126+
// Currently, we assume that once gradient is generated, it can be
127+
// broadcast, and each gradient is only broadcast once. But there are no
128+
// other cases, for example, we need to adjust the gradient according to
129+
// the input when we get the gradient, which is not considered at present.
125130
for (auto &og : var_names) {
126-
if (grad_names_.count(og) != 0) { // is param grad
127-
// Insert NCCL AllReduce Op
131+
if (grad_names_.count(og) != 0 &&
132+
og_has_been_broadcast.count(og) == 0) { // is param grad
133+
// Insert NCCL AllReduce Op
134+
og_has_been_broadcast.insert(og);
128135
#ifdef PADDLE_WITH_CUDA
129136
result.ops_.emplace_back(
130137
new NCCLAllReduceOpHandle(local_scopes_, places_, *nccl_ctxs_));

0 commit comments

Comments
 (0)