Skip to content

Commit e0c8397

Browse files
authored
Merge pull request #14257 from jacquesqiao/optimize-pserver-profiler-thread-pool
clean rpc server profiler
2 parents ffc8661 + 3b8dd9e commit e0c8397

File tree

9 files changed

+8
-56
lines changed

9 files changed

+8
-56
lines changed

paddle/fluid/operators/distributed/grpc_variable_response.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -286,10 +286,10 @@ int GRPCVariableResponse::Parse(Source* source) {
286286
platform::EnableProfiler(platform::ProfilerState::kCPU);
287287
} else if (profiling == platform::kDisableProfiler &&
288288
platform::IsProfileEnabled()) {
289-
// TODO(panyx0718): Should we allow to customize file dir.
290289
platform::DisableProfiler(
291290
platform::EventSortingKey::kDefault,
292-
string::Sprintf("/tmp/profile_ps_%lld", listener_id));
291+
string::Sprintf("%s_%lld", FLAGS_rpc_server_profile_path,
292+
listener_id));
293293
}
294294
break;
295295
}

paddle/fluid/operators/distributed/request_handler_impl.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ bool RequestSendHandler::Handle(const std::string& varname,
5151
// Async
5252
if (!sync_mode_) {
5353
VLOG(3) << "async process var: " << varname;
54-
rpc_server_->Profiler().OneStep();
5554
try {
5655
executor_->RunPreparedContext((*grad_to_prepared_ctx_)[varname].get(),
5756
scope);

paddle/fluid/operators/distributed/rpc_server.cc

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -20,42 +20,10 @@
2020
#include "paddle/fluid/operators/distributed/rpc_server.h"
2121
#include "paddle/fluid/platform/profiler.h"
2222

23-
DEFINE_int32(rpc_server_profile_period, 0,
24-
"the period of listen_and_serv to do profile");
25-
DEFINE_string(rpc_server_profile_path, "/dev/null",
26-
"the profile log file path");
27-
2823
namespace paddle {
2924
namespace operators {
3025
namespace distributed {
3126

32-
RPCServerProfiler::RPCServerProfiler(int profile_period,
33-
const std::string& profile_log_path)
34-
: profile_period_(profile_period), profile_log_path_(profile_log_path) {
35-
step_ = 0;
36-
}
37-
38-
void RPCServerProfiler::OneStep() {
39-
PADDLE_ENFORCE_LE(step_, profile_period_,
40-
"step_ should not be larger then "
41-
"profile_period_");
42-
if (profile_period_ <= 0) {
43-
return;
44-
}
45-
46-
if (step_ == 0) {
47-
auto pf_state = paddle::platform::ProfilerState::kCPU;
48-
paddle::platform::EnableProfiler(pf_state);
49-
}
50-
if (step_ == profile_period_) {
51-
paddle::platform::DisableProfiler(paddle::platform::EventSortingKey::kTotal,
52-
profile_log_path_);
53-
step_ = 0;
54-
} else {
55-
step_++;
56-
}
57-
}
58-
5927
void RPCServer::ShutDown() {
6028
LOG(INFO) << "RPCServer ShutDown ";
6129
ShutDownImpl();

paddle/fluid/operators/distributed/rpc_server.h

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,30 +23,14 @@
2323

2424
#include "paddle/fluid/operators/distributed/request_handler.h"
2525

26-
DECLARE_int32(rpc_server_profile_period);
27-
DECLARE_string(rpc_server_profile_path);
28-
2926
namespace paddle {
3027
namespace operators {
3128
namespace distributed {
3229

33-
class RPCServerProfiler {
34-
public:
35-
RPCServerProfiler(int profile_period, const std::string& profile_log_path);
36-
void OneStep();
37-
38-
private:
39-
const int profile_period_;
40-
std::string profile_log_path_;
41-
int step_;
42-
};
43-
4430
class RPCServer {
4531
public:
4632
explicit RPCServer(const std::string& address, int client_num)
4733
: cur_cond_(0),
48-
profiler_(FLAGS_rpc_server_profile_period,
49-
FLAGS_rpc_server_profile_path),
5034
bind_address_(address),
5135
exit_flag_(false),
5236
selected_port_(0),
@@ -86,7 +70,6 @@ class RPCServer {
8670
void Complete();
8771

8872
void ResetBarrierCounter();
89-
RPCServerProfiler& Profiler() { return profiler_; }
9073

9174
bool NeedResetAllVars();
9275

@@ -101,7 +84,6 @@ class RPCServer {
10184
std::unordered_map<std::string, int> rpc_cond_map_;
10285
std::atomic<int> cur_cond_;
10386
std::condition_variable rpc_cond_;
104-
RPCServerProfiler profiler_;
10587

10688
protected:
10789
std::string bind_address_;

paddle/fluid/operators/distributed/variable_response.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
#include <vector>
1717
#include "paddle/fluid/operators/distributed/sendrecvop_utils.h"
1818

19+
DEFINE_string(rpc_server_profile_path, "./profile_ps",
20+
"the profile log file path");
21+
1922
namespace paddle {
2023
namespace operators {
2124
namespace distributed {

paddle/fluid/operators/distributed/variable_response.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
#include "paddle/fluid/framework/tensor.h"
2828
#include "paddle/fluid/operators/distributed/send_recv.pb.h"
2929

30+
DECLARE_string(rpc_server_profile_path);
31+
3032
namespace paddle {
3133
namespace operators {
3234
namespace distributed {

paddle/fluid/operators/listen_and_serv_op.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@ void ListenAndServOp::RunSyncLoop(
134134
rpc_service_->ResetBarrierCounter();
135135

136136
while (true) {
137-
rpc_service_->Profiler().OneStep();
138137
// Get from multiple trainers, we don't care about the order in which
139138
// the gradients arrives, just add suffix 0~n and merge the gradient.
140139
rpc_service_->SetCond(distributed::kRequestSend);

paddle/fluid/platform/profiler.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ RecordBlock::~RecordBlock() {
226226

227227
void EnableProfiler(ProfilerState state) {
228228
PADDLE_ENFORCE(state != ProfilerState::kDisabled,
229-
"Can't enbale profling, since the input state is ",
229+
"Can't enable profiling, since the input state is ",
230230
"ProfilerState::kDisabled");
231231

232232
std::lock_guard<std::mutex> l(profiler_mu);

python/paddle/fluid/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ def __bootstrap__():
118118
]
119119
if core.is_compiled_with_dist():
120120
read_env_flags.append('rpc_deadline')
121-
read_env_flags.append('rpc_server_profile_period')
122121
read_env_flags.append('rpc_server_profile_path')
123122
read_env_flags.append('enable_rpc_profiler')
124123
read_env_flags.append('rpc_send_thread_num')

0 commit comments

Comments
 (0)