Skip to content

Commit a4d68ed

Browse files
committed
Add lock
1 parent d054cfe commit a4d68ed

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

paddle/fluid/operators/nccl/nccl_gpu_common.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,18 @@ std::unique_ptr<std::vector<ncclComm_t>> global_comms;
2323
std::unique_ptr<std::unordered_map<int, int>> comm_id_map;
2424
bool inited = false;
2525
size_t last_num_gpus = -1;
26+
// TODO(panyx0718): Need to decide whether Paddle supports parallel
27+
// runs with different number GPUs. If true, current solution is not enough.
28+
std::mutex comm_mu;
2629
}
2730

2831
int Communicator::GetCommId(int device_id) const {
32+
std::lock_guard<std::mutex> guard(comm_mu);
2933
return comm_id_map->at(device_id);
3034
}
3135

3236
void Communicator::InitAll(const std::vector<int>& gpus) {
37+
std::lock_guard<std::mutex> guard(comm_mu);
3338
if (inited && last_num_gpus == gpus.size()) {
3439
return;
3540
}
@@ -52,6 +57,7 @@ void Communicator::InitAll(const std::vector<int>& gpus) {
5257
}
5358

5459
const std::vector<ncclComm_t>& Communicator::comms() const {
60+
std::lock_guard<std::mutex> guard(comm_mu);
5561
return *global_comms;
5662
}
5763

0 commit comments

Comments
 (0)