@@ -314,25 +314,23 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
314
314
member_->use_all_reduce_ =
315
315
build_strategy.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce ;
316
316
member_->nranks_ = build_strategy.num_trainers_ * places.size ();
317
+ if (!member_->use_all_reduce_ && member_->nranks_ == 1 ) {
318
+ LOG (INFO) << " If you set build_strategy.reduce with 'Reduce',"
319
+ " the number of places should be greater than 1." ;
320
+ member_->build_strategy_ .reduce_ =
321
+ BuildStrategy::ReduceStrategy::kAllReduce ;
322
+ member_->use_all_reduce_ = true ;
323
+ }
317
324
#if defined(PADDLE_WITH_CUDA) && defined(_WIN32)
318
325
if (member_->use_cuda_ ) {
319
326
PADDLE_ENFORCE (places.size () == 1 , " Windows can support Single GPU only." );
320
327
}
321
328
#endif
322
- if (!member_->use_all_reduce_ ) {
323
- if (places.size () == 1 ) {
324
- LOG (INFO) << " If you set build_strategy.reduce with 'Reduce',"
325
- " the number of places should be greater than 1." ;
326
- member_->use_all_reduce_ = true ;
327
- }
328
- }
329
-
330
329
LOG (INFO) << string::Sprintf (
331
330
" The number of %s, which is used in ParallelExecutor, is %lu. And "
332
331
" the Program will be copied %lu copies" ,
333
332
(member_->use_cuda_ ? " CUDAPlace" : " CPUPlace" ), places.size (),
334
333
places.size ());
335
-
336
334
// Step 1. Bcast the bcast_vars to devs.
337
335
// Create local scopes
338
336
if (local_scopes.empty ()) {
0 commit comments