Skip to content

Commit aa9f516

Browse files
committed
code refine, add comment and some naming problem
1 parent d32c7a6 commit aa9f516

File tree

5 files changed

+52
-35
lines changed

5 files changed

+52
-35
lines changed

paddle/pserver/ParameterServer2Main.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ using namespace paddle; // NOLINT
2020
int main(int argc, char** argv) {
2121
initMain(argc, argv);
2222

23-
std::unique_ptr<ParameterServerController> pServerPtr(
24-
paddle::ParameterServerController::createByGflags());
25-
pServerPtr->start();
26-
pServerPtr->join();
23+
std::unique_ptr<ParameterServerController> parameterServerPtr(
24+
paddle::ParameterServerController::createFromGflags());
25+
parameterServerPtr->start();
26+
parameterServerPtr->wait();
2727

2828
return 0;
2929
}

paddle/pserver/ParameterServerController.cpp

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -25,43 +25,44 @@ ParameterServerController::ParameterServerController(
2525
int numPorts = config.ports_num() + config.ports_num_for_sparse();
2626

2727
if (config.nics().empty()) {
28-
pservers_.resize(numPorts);
28+
parameterServers_.resize(numPorts);
2929
for (int i = 0; i < numPorts; ++i) {
3030
if (config.rdma_tcp() == "rdma") {
31-
pservers_[i].reset(
31+
parameterServers_[i].reset(
3232
new ParameterServer2(std::string(), config.port() + i, rdmaCpu++));
3333
rdmaCpu = rdmaCpu % onlineCpus;
3434
} else {
35-
pservers_[i].reset(
35+
parameterServers_[i].reset(
3636
new ParameterServer2(std::string(), config.port() + i));
3737
}
38-
CHECK(pservers_[i]->init()) << "Fail to initialize parameter server"
39-
<< config.port() + i;
38+
CHECK(parameterServers_[i]->init()) << "Fail to initialize parameter "
39+
"server on port "
40+
<< config.port() + i;
4041
}
4142
} else {
4243
str::split(config.nics(), ',', &devices);
43-
pservers_.resize(devices.size() * numPorts);
44+
parameterServers_.resize(devices.size() * numPorts);
4445
for (int i = 0; i < numPorts; ++i) {
4546
for (size_t j = 0; j < devices.size(); ++j) {
4647
if (config.rdma_tcp() == "rdma") {
47-
pservers_[i * devices.size() + j].reset(new ParameterServer2(
48+
parameterServers_[i * devices.size() + j].reset(new ParameterServer2(
4849
getIpAddr(devices[j]), config.port() + i, rdmaCpu++));
4950
rdmaCpu = rdmaCpu % onlineCpus;
5051
} else {
51-
pservers_[i * devices.size() + j].reset(
52+
parameterServers_[i * devices.size() + j].reset(
5253
new ParameterServer2(getIpAddr(devices[j]), config.port() + i));
5354
}
54-
CHECK(pservers_[i * devices.size() + j]->init())
55-
<< "Fail to initialize parameter server" << devices[j]
55+
CHECK(parameterServers_[i * devices.size() + j]->init())
56+
<< "Fail to initialize parameter server with device " << devices[j]
5657
<< config.port() + i;
5758
}
5859
}
5960
}
6061
}
6162

62-
ParameterServerController::~ParameterServerController() { this->join(); }
63+
ParameterServerController::~ParameterServerController() { this->wait(); }
6364

64-
ParameterServerController* ParameterServerController::createByGflags() {
65+
ParameterServerController* ParameterServerController::createFromGflags() {
6566
ParameterServerConfig config;
6667

6768
config.set_nics(FLAGS_nics);
@@ -79,21 +80,21 @@ ParameterServerController* ParameterServerController::create(
7980
}
8081

8182
void ParameterServerController::start() {
82-
LOG(INFO) << "pserver sizes : " << pservers_.size();
83+
LOG(INFO) << "number of parameterServer instances: "
84+
<< parameterServers_.size();
8385
int i = 0;
84-
for (const auto& pserver : pservers_) {
85-
LOG(INFO) << "pserver started : " << i;
86-
pserver->start();
86+
for (const auto& parameterServer : parameterServers_) {
87+
LOG(INFO) << "Starting parameterServer[" << i << "]";
88+
parameterServer->start();
8789
i++;
8890
}
8991
}
9092

91-
void ParameterServerController::join() {
92-
LOG(INFO) << "pserver sizes : " << pservers_.size();
93+
void ParameterServerController::wait() {
9394
int i = 0;
94-
for (const auto& pserver : pservers_) {
95-
LOG(INFO) << "pserver join : " << i;
96-
pserver->join();
95+
for (const auto& parameterServer : parameterServers_) {
96+
LOG(INFO) << "Waiting parameterServer[" << i << "]";
97+
parameterServer->join();
9798
i++;
9899
}
99100
}

paddle/pserver/ParameterServerController.h

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ limitations under the License. */
2121

2222
namespace paddle {
2323

24+
/**
25+
* @brief ParameterServerController is used for create, init and manage multi
26+
* parameter server instances. The num of the instances is decided by port
27+
* num(the ports number for parameter send) and network devices configured
28+
* by gflags or proto.
29+
*/
2430
class ParameterServerController final {
2531
public:
2632
DISABLE_COPY(ParameterServerController);
@@ -39,28 +45,30 @@ class ParameterServerController final {
3945
* @brief create ParameterServerController from gflags, this is used for
4046
* compatibility with the old usage of configuration by gflags.
4147
*/
42-
static ParameterServerController* createByGflags();
48+
static ParameterServerController* createFromGflags();
4349

4450
/**
4551
* @brief create ParameterServerController with ParameterServerConfig, remove
46-
* gflags from ParameterServer. Init all pservers thread according to the
47-
* config.
52+
* gflags from ParameterServer. Init all ParameterServer2 instances according
53+
* to
54+
* the config.
4855
*/
4956
static ParameterServerController* create(const ParameterServerConfig& config);
5057

5158
/**
52-
* @brief start all pserver thread in this ParameterServerController.
59+
* @brief start all ParameterServer2 instances in this
60+
* ParameterServerController.
5361
*/
5462
void start();
5563

5664
/**
57-
* @brief join and wait for all pserver thread in this
65+
* @brief join and wait for all ParameterServer2 instances thread in this
5866
* ParameterServerController.
5967
*/
60-
void join();
68+
void wait();
6169

6270
private:
63-
std::vector<std::unique_ptr<ParameterServer2>> pservers_;
71+
std::vector<std::unique_ptr<ParameterServer2>> parameterServers_;
6472
};
6573

6674
} // namespace paddle

paddle/trainer/TrainerMain.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,11 @@ int main(int argc, char** argv) {
3636
initMain(argc, argv);
3737
initPython(argc, argv);
3838

39-
std::unique_ptr<ParameterServerController> pServerPtr(nullptr);
39+
std::unique_ptr<ParameterServerController> parameterServerPtr(nullptr);
4040
if (FLAGS_start_pserver) {
41-
pServerPtr.reset(paddle::ParameterServerController::createByGflags());
42-
pServerPtr->start();
41+
parameterServerPtr.reset(
42+
paddle::ParameterServerController::createFromGflags());
43+
parameterServerPtr->start();
4344
}
4445
Trainer trainer;
4546
auto config = TrainerConfigHelper::createFromFlags();

proto/ParameterServerConfig.proto

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,17 @@ syntax = "proto2";
1515

1616
package paddle;
1717

18+
19+
/**
20+
* Configuration structure for ParameterClient2.
21+
*/
1822
message ParameterClientConfig {
1923
required int32 trainer_id = 1;
2024
}
2125

26+
/**
27+
* Configuration structure for ParameterServer2.
28+
*/
2229
message ParameterServerConfig {
2330
// The ports number for parameter send,
2431
// increment based on default port number

0 commit comments

Comments
 (0)