Skip to content

Commit 7b0c027

Browse files
committed
update by comments
1 parent 928418a commit 7b0c027

File tree

4 files changed

+17
-17
lines changed

4 files changed

+17
-17
lines changed

paddle/fluid/framework/parallel_executor.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class ParallelExecutor {
4242
const std::vector<Scope*>& local_scopes,
4343
bool allow_op_delay, bool use_default_grad_scale,
4444
bool balance_parameter_opt_between_cards,
45-
size_t num_trainers = 0, size_t trainer_id = 0);
45+
size_t num_trainers = 1, size_t trainer_id = 0);
4646

4747
~ParallelExecutor();
4848

paddle/fluid/operators/gen_nccl_id_op.cc

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -75,29 +75,29 @@ class GenNCCLIdOp : public framework::OperatorBase {
7575
// NOTE: Can not use unique_ptr here because the default
7676
// deleter will call GRPC Server's base class's dtor and
7777
// that will cause a wired crash.
78-
rpc_service_ = new detail::AsyncGRPCServer(endpoint, true);
78+
79+
detail::AsyncGRPCServer rpc_service(endpoint, true);
7980
framework::ProgramDesc empty_program;
8081
framework::Executor executor(dev_ctx.GetPlace());
81-
rpc_service_->SetScope(scope);
82-
rpc_service_->SetDevCtx(&dev_ctx);
83-
rpc_service_->SetProgram(&empty_program);
84-
rpc_service_->SetExecutor(&executor);
82+
rpc_service.SetScope(scope);
83+
rpc_service.SetDevCtx(&dev_ctx);
84+
rpc_service.SetProgram(&empty_program);
85+
rpc_service.SetExecutor(&executor);
8586

8687
std::thread server_thread(
87-
std::bind(&detail::AsyncGRPCServer::RunSyncUpdate, rpc_service_));
88-
rpc_service_->SetCond(0);
88+
std::bind(&detail::AsyncGRPCServer::RunSyncUpdate, &rpc_service));
89+
rpc_service.SetCond(0);
8990
VLOG(3) << "start getting nccl id from trainer 0...";
90-
auto recv = rpc_service_->Get();
91+
auto recv = rpc_service.Get();
9192
VLOG(3) << "got nccl id and stop server...";
92-
rpc_service_->ShutDown();
93+
rpc_service.ShutDown();
9394
VLOG(3) << "rpc server stopped";
9495
// TODO(wuyi): reinit nccl communicators
9596
server_thread.join();
96-
delete rpc_service_;
9797
}
9898

99-
protected:
100-
mutable detail::AsyncGRPCServer* rpc_service_ = nullptr;
99+
// protected:
100+
// mutable detail::AsyncGRPCServer* rpc_service_ = nullptr;
101101
};
102102

103103
class GenNCCLIdOpMaker : public framework::OpProtoAndCheckerMaker {

paddle/fluid/platform/nccl_helper.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ struct NCCLContextMap {
7878

7979
explicit NCCLContextMap(const std::vector<platform::Place> &places,
8080
ncclUniqueId *nccl_id = nullptr,
81-
size_t num_trainers = 0, size_t trainer_id = 0) {
81+
size_t num_trainers = 1, size_t trainer_id = 0) {
8282
PADDLE_ENFORCE(!places.empty());
8383
order_.reserve(places.size());
8484
for (auto &p : places) {
@@ -100,7 +100,7 @@ struct NCCLContextMap {
100100
PADDLE_ENFORCE(platform::dynload::ncclCommInitAll(
101101
comms.get(), static_cast<int>(order_.size()), order_.data()));
102102
} else {
103-
PADDLE_ENFORCE_GT(num_trainers, 0);
103+
PADDLE_ENFORCE_GT(num_trainers, 1);
104104
// TODO(wuyi): need to ensure each node have same number of GPUs
105105
{
106106
int nranks = num_trainers * order_.size();

python/paddle/fluid/parallel_executor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def __init__(self,
3232
share_vars_from=None,
3333
use_default_grad_scale=True,
3434
balance_parameter_opt_between_cards=False,
35-
num_trainers=0,
35+
num_trainers=1,
3636
trainer_id=0):
3737
"""
3838
ParallelExecutor can run program in parallel.
@@ -57,7 +57,7 @@ def __init__(self,
5757
balance_parameter_opt_between_cards(bool, default True): Whether
5858
updating different gradients on different cards. Currently, it
5959
is not recommended.
60-
num_trainers(int, default 0): If greater than 0, NCCL will be
60+
num_trainers(int, default 1): If greater than 1, NCCL will be
6161
initialized with multpile rank of nodes, each node should have
6262
same number of GPUs. Distributed training will be enabled then.
6363
trainer_id(int, default 0): Must use together with num_trainers.

0 commit comments

Comments
 (0)