Skip to content

Commit 0358fd0

Browse files
committed
Refine profiler code.
1 parent 05a733b commit 0358fd0

File tree

5 files changed

+28
-26
lines changed

5 files changed

+28
-26
lines changed

paddle/framework/executor.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
120120
VLOG(3) << op->DebugStringEx(local_scope);
121121

122122
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
123-
auto dev_ctx = const_cast<platform::DeviceContext*>(pool.Get(place_));
124-
platform::RecordEvent record_event(op->Type(), dev_ctx);
123+
platform::RecordEvent record_event(op->Type(), pool.Get(place_));
125124

126125
op->Run(*local_scope, place_);
127126
if (FLAGS_do_memory_benchmark) {

paddle/platform/profiler.cc

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,16 @@ inline uint64_t GetTimeInNsec() {
4747
}
4848

4949
Event::Event(EventKind kind, std::string name, uint32_t thread_id,
50-
DeviceContext* dev_ctx)
50+
const DeviceContext* dev_ctx)
5151
: kind_(kind), name_(name), thread_id_(thread_id), has_cuda_(false) {
5252
#ifdef PADDLE_WITH_CUDA
53-
auto* cuda_dev_ctx = static_cast<const CUDADeviceContext*>(dev_ctx);
54-
if (cuda_dev_ctx) {
53+
has_cuda_ = dev_ctx ? platform::is_gpu_place(dev_ctx->GetPlace()) : false;
54+
if (has_cuda_) {
55+
auto* cuda_dev_ctx = static_cast<const CUDADeviceContext*>(dev_ctx);
5556
PADDLE_ENFORCE(cudaGetDevice(&device_));
5657
PADDLE_ENFORCE(cudaEventCreate(&event_));
5758
auto stream = cuda_dev_ctx->stream();
5859
PADDLE_ENFORCE(cudaEventRecord(event_, stream));
59-
has_cuda_ = true;
6060
}
6161
#endif
6262
cpu_ns_ = GetTimeInNsec();
@@ -114,19 +114,20 @@ inline EventList& GetEventList() {
114114
return *g_event_list;
115115
}
116116

117-
void Mark(const std::string& name, DeviceContext* dev_ctx) {
117+
void Mark(const std::string& name, const DeviceContext* dev_ctx) {
118118
GetEventList().Record(EventKind::kMark, name, g_thread_id, dev_ctx);
119119
}
120120

121-
void PushEvent(const std::string& name, DeviceContext* dev_ctx) {
121+
void PushEvent(const std::string& name, const DeviceContext* dev_ctx) {
122122
GetEventList().Record(EventKind::kPushRange, name, g_thread_id, dev_ctx);
123123
}
124124

125-
void PopEvent(const std::string& name, DeviceContext* dev_ctx) {
125+
void PopEvent(const std::string& name, const DeviceContext* dev_ctx) {
126126
GetEventList().Record(EventKind::kPopRange, name, g_thread_id, dev_ctx);
127127
}
128128

129-
RecordEvent::RecordEvent(const std::string& name, DeviceContext* dev_ctx) {
129+
RecordEvent::RecordEvent(const std::string& name,
130+
const DeviceContext* dev_ctx) {
130131
if (g_state == ProfilerState::kDisabled) return;
131132
dev_ctx_ = dev_ctx;
132133
name_ = name;
@@ -155,6 +156,7 @@ void EnableProfiler(ProfilerState state) {
155156
DeviceContext* dev_ctx = new CUDADeviceContext(CUDAPlace(d));
156157
Mark("_cuda_startup_", dev_ctx);
157158
dev_ctx->Wait();
159+
delete dev_ctx;
158160
});
159161
}
160162
}

paddle/platform/profiler.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class Event {
2929
// The DeviceContext is used to get the cuda stream.
3030
// If CPU profiling mode, can pass nullptr.
3131
Event(EventKind kind, std::string name, uint32_t thread_id,
32-
DeviceContext* dev_ctx);
32+
const DeviceContext* dev_ctx);
3333

3434
std::string kind() const;
3535
std::string name() const { return name_; }
@@ -95,19 +95,19 @@ enum ProfilerState {
9595
kCUDA, // GPU profiling state
9696
};
9797

98-
void Mark(const std::string& name, DeviceContext* dev_ctx);
98+
void Mark(const std::string& name, const DeviceContext* dev_ctx);
9999

100-
void PushEvent(const std::string& name, DeviceContext* dev_ctx);
100+
void PushEvent(const std::string& name, const DeviceContext* dev_ctx);
101101

102-
void PopEvent(const std::string& name, DeviceContext* dev_ctx);
102+
void PopEvent(const std::string& name, const DeviceContext* dev_ctx);
103103

104104
struct RecordEvent {
105-
explicit RecordEvent(const std::string& name, DeviceContext* dev_ctx);
105+
explicit RecordEvent(const std::string& name, const DeviceContext* dev_ctx);
106106

107107
~RecordEvent();
108108

109109
// The device context is used by Event to get the current cuda stream.
110-
DeviceContext* dev_ctx_;
110+
const DeviceContext* dev_ctx_;
111111
// Event name
112112
std::string name_;
113113
};

python/paddle/v2/fluid/profiler.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,11 @@ def profiler(state, sorted_key=None):
8181
to add more records.
8282
8383
Args:
84-
state (string) : The profiling state, It should be 'CPU' or 'GPU'.
85-
Although users may define CPUPlace or CUDAPlace when using Fluid,
86-
the profiler doesn't get the state based on this Place. Since the
87-
implementation is an independent part from the Fluid.
84+
state (string) : The profiling state, which should be 'CPU' or 'GPU',
85+
telling the profiler to use CPU timer or GPU timer for profiling.
86+
Although users may have already specified the execution place
87+
(CPUPlace/CUDAPlace) in the begining, for flexibility the profiler
88+
would not inherit this place.
8889
sorted_key (string) : If None, the profiling results will be printed
8990
in the order of first end time of events. Otherwise, the profiling
9091
results will be sorted by the this flag. This flag should be one

python/paddle/v2/fluid/tests/test_profiler.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ def test_nvprof(self):
4141
exe.run(fluid.default_main_program(), feed={'data': input})
4242
os.remove(output_file)
4343

44-
def profiler(self, state):
45-
if state == 'GPU' and core.is_compile_gpu():
44+
def net_profiler(self, state):
45+
if state == 'GPU' and not core.is_compile_gpu():
4646
return
4747
startup_program = fluid.Program()
4848
main_program = fluid.Program()
@@ -79,11 +79,11 @@ def profiler(self, state):
7979
acc = np.array(outs[1])
8080
pass_acc = accuracy.eval(exe)
8181

82-
def not_test_cpu_profiler(self):
83-
self.profiler('CPU')
82+
def test_cpu_profiler(self):
83+
self.net_profiler('CPU')
8484

85-
def not_test_cuda_profiler(self):
86-
self.profiler('GPU')
85+
def test_cuda_profiler(self):
86+
self.net_profiler('GPU')
8787

8888

8989
if __name__ == '__main__':

0 commit comments

Comments
 (0)