@@ -73,8 +73,9 @@ void NCCLAllReduceOpHandle::RunImpl() {
73
73
74
74
for (size_t i = 0 ; i < local_scopes_.size (); ++i) {
75
75
auto *s = local_scopes_[i];
76
+ auto &local_scope = *s->FindVar (kLocalExecScopeName )->Get <Scope *>();
76
77
77
- auto &lod_tensor = s-> FindVar (var_name)->Get <LoDTensor>();
78
+ auto &lod_tensor = local_scope. FindVar (var_name)->Get <LoDTensor>();
78
79
lod_tensors.emplace_back (lod_tensor);
79
80
}
80
81
@@ -110,17 +111,21 @@ void NCCLAllReduceOpHandle::RunImpl() {
110
111
}
111
112
});
112
113
} else { // Special handle CPU only Operator's gradient. Like CRF
113
- auto &trg =
114
- *this ->local_scopes_ [0 ]->Var ()->GetMutable <framework::LoDTensor>();
114
+ auto &trg = *this ->local_scopes_ [0 ]
115
+ ->FindVar (kLocalExecScopeName )
116
+ ->Get <Scope *>()
117
+ ->Var ()
118
+ ->GetMutable <framework::LoDTensor>();
115
119
116
120
// Reduce All Tensor to trg in CPU
117
121
ReduceLoDTensor func (lod_tensors, &trg);
118
122
VisitDataType (ToDataType (lod_tensors[0 ].type ()), func);
119
123
120
124
for (size_t i = 0 ; i < local_scopes_.size (); ++i) {
121
- auto &scope = local_scopes_[i];
125
+ auto &scope =
126
+ *local_scopes_[i]->FindVar (kLocalExecScopeName )->Get <Scope *>();
122
127
auto &p = places_[i];
123
- auto *var = scope-> FindVar (var_name);
128
+ auto *var = scope. FindVar (var_name);
124
129
auto *dev_ctx = dev_ctxes_[p];
125
130
126
131
RunAndRecordEvent (p, [&trg, var, dev_ctx, p] {
0 commit comments