@@ -61,7 +61,7 @@ struct NCCLContext {
61
61
ncclComm_t comm_;
62
62
63
63
explicit NCCLContext (int dev_id)
64
- : ctx_(new CUDADeviceContext(CUDAPlace(dev_id))) {}
64
+ : ctx_(new CUDADeviceContext(CUDAPlace(dev_id))), comm_{ nullptr } {}
65
65
66
66
cudaStream_t stream () const { return ctx_->stream (); }
67
67
@@ -95,6 +95,7 @@ struct NCCLContextMap {
95
95
std::vector<int > order_;
96
96
97
97
explicit NCCLContextMap (const std::vector<platform::Place> &places) {
98
+ PADDLE_ENFORCE (!places.empty ());
98
99
order_.reserve (places.size ());
99
100
for (auto &p : places) {
100
101
int dev_id = boost::get<CUDAPlace>(p).device ;
@@ -105,15 +106,17 @@ struct NCCLContextMap {
105
106
order_.size (), contexts_.size (),
106
107
" NCCL Context Map does not support contain two or more same device" );
107
108
108
- std::vector<ncclComm_t> comms;
109
- comms.resize (order_.size ());
109
+ if (places.size () > 1 ) {
110
+ std::vector<ncclComm_t> comms;
111
+ comms.resize (order_.size ());
110
112
111
- PADDLE_ENFORCE (platform::dynload::ncclCommInitAll (
112
- &comms[0 ], static_cast <int >(order_.size ()), &order_[0 ]));
113
+ PADDLE_ENFORCE (platform::dynload::ncclCommInitAll (
114
+ &comms[0 ], static_cast <int >(order_.size ()), &order_[0 ]));
113
115
114
- int i = 0 ;
115
- for (auto &dev_id : order_) {
116
- contexts_.at (dev_id).comm_ = comms[i++];
116
+ int i = 0 ;
117
+ for (auto &dev_id : order_) {
118
+ contexts_.at (dev_id).comm_ = comms[i++];
119
+ }
117
120
}
118
121
}
119
122
0 commit comments