Skip to content

Commit fbe5624

Browse files
authored
Merge pull request #9994 from reyoung/feature/debug
Fix bugs in local_scopes
2 parents ee9832a + 06fb055 commit fbe5624

File tree

3 files changed

+22
-13
lines changed

3 files changed

+22
-13
lines changed

paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,9 @@ void NCCLAllReduceOpHandle::RunImpl() {
7373

7474
for (size_t i = 0; i < local_scopes_.size(); ++i) {
7575
auto *s = local_scopes_[i];
76+
auto &local_scope = *s->FindVar(kLocalExecScopeName)->Get<Scope *>();
7677

77-
auto &lod_tensor = s->FindVar(var_name)->Get<LoDTensor>();
78+
auto &lod_tensor = local_scope.FindVar(var_name)->Get<LoDTensor>();
7879
lod_tensors.emplace_back(lod_tensor);
7980
}
8081

@@ -110,17 +111,21 @@ void NCCLAllReduceOpHandle::RunImpl() {
110111
}
111112
});
112113
} else { // Special handle CPU only Operator's gradient. Like CRF
113-
auto &trg =
114-
*this->local_scopes_[0]->Var()->GetMutable<framework::LoDTensor>();
114+
auto &trg = *this->local_scopes_[0]
115+
->FindVar(kLocalExecScopeName)
116+
->Get<Scope *>()
117+
->Var()
118+
->GetMutable<framework::LoDTensor>();
115119

116120
// Reduce All Tensor to trg in CPU
117121
ReduceLoDTensor func(lod_tensors, &trg);
118122
VisitDataType(ToDataType(lod_tensors[0].type()), func);
119123

120124
for (size_t i = 0; i < local_scopes_.size(); ++i) {
121-
auto &scope = local_scopes_[i];
125+
auto &scope =
126+
*local_scopes_[i]->FindVar(kLocalExecScopeName)->Get<Scope *>();
122127
auto &p = places_[i];
123-
auto *var = scope->FindVar(var_name);
128+
auto *var = scope.FindVar(var_name);
124129
auto *dev_ctx = dev_ctxes_[p];
125130

126131
RunAndRecordEvent(p, [&trg, var, dev_ctx, p] {

paddle/fluid/framework/details/scale_loss_grad_op_handle.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,11 @@ ScaleLossGradOpHandle::~ScaleLossGradOpHandle() {}
3030

3131
void ScaleLossGradOpHandle::RunImpl() {
3232
std::string var_name = static_cast<VarHandle *>(this->outputs_[0])->name_;
33+
auto &local_scope = *scope_->FindVar(kLocalExecScopeName)->Get<Scope *>();
3334

34-
float *tmp =
35-
scope_->FindVar(var_name)->GetMutable<LoDTensor>()->mutable_data<float>(
36-
make_ddim({1}), place_);
35+
float *tmp = local_scope.FindVar(var_name)
36+
->GetMutable<LoDTensor>()
37+
->mutable_data<float>(make_ddim({1}), place_);
3738

3839
if (platform::is_cpu_place(place_)) {
3940
*tmp = coeff_;

paddle/fluid/framework/parallel_executor.cc

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,14 @@ ParallelExecutor::ParallelExecutor(
6363
// Step 1. Bcast the params to devs.
6464
// Create local scopes
6565
if (local_scopes.empty()) {
66-
for (size_t i = 0; i < member_->places_.size(); ++i) {
67-
member_->local_scopes_.push_back(&scope->NewScope());
66+
member_->local_scopes_.emplace_back(member_->global_scope_);
67+
for (size_t i = 1; i < member_->places_.size(); ++i) {
68+
member_->local_scopes_.emplace_back(&scope->NewScope());
6869
}
6970
} else {
7071
PADDLE_ENFORCE_EQ(member_->places_.size(), local_scopes.size());
7172
for (size_t i = 0; i < member_->places_.size(); ++i) {
72-
member_->local_scopes_.push_back(local_scopes[i]);
73+
member_->local_scopes_.emplace_back(local_scopes[i]);
7374
}
7475
}
7576

@@ -159,7 +160,9 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
159160
const std::string &fetched_var_name) {
160161
platform::RecordBlock b(0);
161162
// Create local scopes.
162-
for (auto &scope : member_->local_scopes_) {
163+
for (auto it = member_->local_scopes_.rbegin();
164+
it != member_->local_scopes_.rend(); ++it) {
165+
auto &scope = *it;
163166
Scope &local_scope = scope->NewScope();
164167
*scope->Var(details::kLocalExecScopeName)->GetMutable<Scope *>() =
165168
&local_scope;
@@ -173,7 +176,7 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
173176
InitializeVariable(scope->Var(std::get<0>(name_type_pair)),
174177
std::get<1>(name_type_pair));
175178
} else {
176-
InitializeVariable(scope->Var(std::get<0>(name_type_pair)),
179+
InitializeVariable(local_scope.Var(std::get<0>(name_type_pair)),
177180
std::get<1>(name_type_pair));
178181
}
179182
}

0 commit comments

Comments
 (0)