File tree Expand file tree Collapse file tree 1 file changed +6
-0
lines changed
paddle/fluid/operators/nccl Expand file tree Collapse file tree 1 file changed +6
-0
lines changed Original file line number Diff line number Diff line change @@ -23,13 +23,18 @@ std::unique_ptr<std::vector<ncclComm_t>> global_comms;
23
23
std::unique_ptr<std::unordered_map<int , int >> comm_id_map;
24
24
bool inited = false ;
25
25
size_t last_num_gpus = -1 ;
26
+ // TODO(panyx0718): Need to decide whether Paddle supports parallel
27
+ // runs with different number GPUs. If true, current solution is not enough.
28
+ std::mutex comm_mu;
26
29
}
27
30
28
31
int Communicator::GetCommId (int device_id) const {
32
+ std::lock_guard<std::mutex> guard (comm_mu);
29
33
return comm_id_map->at (device_id);
30
34
}
31
35
32
36
void Communicator::InitAll (const std::vector<int >& gpus) {
37
+ std::lock_guard<std::mutex> guard (comm_mu);
33
38
if (inited && last_num_gpus == gpus.size ()) {
34
39
return ;
35
40
}
@@ -52,6 +57,7 @@ void Communicator::InitAll(const std::vector<int>& gpus) {
52
57
}
53
58
54
59
const std::vector<ncclComm_t>& Communicator::comms () const {
60
+ std::lock_guard<std::mutex> guard (comm_mu);
55
61
return *global_comms;
56
62
}
57
63
You can’t perform that action at this time.
0 commit comments