Skip to content

Commit d2a7024

Browse files
committed
Refine profiler and expose to Python.
1 parent df9c13a commit d2a7024

File tree

12 files changed

+171
-91
lines changed

12 files changed

+171
-91
lines changed

cmake/external/pybind11.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ ExternalProject_Add(
2626
extern_pybind
2727
${EXTERNAL_PROJECT_LOG_ARGS}
2828
GIT_REPOSITORY "https://github.com/pybind/pybind11.git"
29-
GIT_TAG "v2.1.1"
29+
GIT_TAG "v2.2.1"
3030
PREFIX ${PYBIND_SOURCE_DIR}
3131
UPDATE_COMMAND ""
3232
CONFIGURE_COMMAND ""

paddle/framework/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ cc_library(backward SRCS backward.cc DEPS net_op)
6868
cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context fill_constant_op)
6969
cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
7070

71-
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog lod_rank_table)
71+
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope
72+
framework_proto backward glog lod_rank_table profiler)
7273

7374
cc_library(prune SRCS prune.cc DEPS framework_proto)
7475
cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)

paddle/framework/executor.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ limitations under the License. */
2222
#include "paddle/framework/lod_tensor_array.h"
2323
#include "paddle/framework/op_registry.h"
2424
#include "paddle/platform/place.h"
25+
#include "paddle/platform/profiler.h"
2526

2627
DEFINE_bool(check_nan_inf, false,
2728
"Checking whether operator produce NAN/INF or not. It will be "
@@ -116,6 +117,11 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
116117
for (auto& op_desc : block.AllOps()) {
117118
auto op = paddle::framework::OpRegistry::CreateOp(*op_desc);
118119
VLOG(3) << op->DebugStringEx(local_scope);
120+
121+
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
122+
auto dev_ctx = const_cast<platform::DeviceContext*>(pool.Get(place_));
123+
platform::RecordEvent record_event(op->Type(), dev_ctx);
124+
119125
op->Run(*local_scope, place_);
120126
if (FLAGS_check_nan_inf) {
121127
for (auto& vname : op->OutputVars(true)) {

paddle/platform/profiler.cc

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -163,21 +163,36 @@ void EnableProfiler(ProfilerState state) {
163163
Mark("_start_profiler_", nullptr);
164164
}
165165

166-
std::vector<std::vector<Event>> DisableProfiler() {
167-
PADDLE_ENFORCE(g_state != ProfilerState::kDisabled,
168-
"Can't disable profiling, since it's not starting.");
169-
// Mark the profiling stop.
170-
Mark("_stop_profiler_", nullptr);
171-
g_state = ProfilerState::kDisabled;
172-
std::vector<std::vector<Event>> result;
166+
void ResetProfiler() {
173167
std::lock_guard<std::mutex> guard(g_all_event_lists_mutex);
168+
for (auto it = g_all_event_lists.begin(); it != g_all_event_lists.end();
169+
++it) {
170+
(*it)->Clear();
171+
}
172+
}
173+
174+
std::vector<std::vector<Event>> GetAllEvents() {
175+
std::lock_guard<std::mutex> guard(g_all_event_lists_mutex);
176+
std::vector<std::vector<Event>> result;
174177
for (auto it = g_all_event_lists.begin(); it != g_all_event_lists.end();
175178
++it) {
176179
result.emplace_back((*it)->Reduce());
177180
}
178181
return result;
179182
}
180183

184+
void DisableProfiler(EventSortingKey sorted_key) {
185+
PADDLE_ENFORCE(g_state != ProfilerState::kDisabled,
186+
"Can't disable profiling, since it's not starting.");
187+
// Mark the profiling stop.
188+
Mark("_stop_profiler_", nullptr);
189+
g_state = ProfilerState::kDisabled;
190+
191+
std::vector<std::vector<Event>> all_events = GetAllEvents();
192+
ParseEvents(all_events, sorted_key);
193+
ResetProfiler();
194+
}
195+
181196
void ParseEvents(std::vector<std::vector<Event>>& events,
182197
EventSortingKey sorted_by) {
183198
if (g_profiler_place == "") return;
@@ -291,12 +306,12 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
291306
}
292307

293308
// Print report
294-
PrintProfilingReport(events_table, sorted_domain, max_name_width + 4, 12);
309+
PrintProfiler(events_table, sorted_domain, max_name_width + 4, 12);
295310
}
296311

297-
void PrintProfilingReport(std::vector<std::vector<EventItem>>& events_table,
298-
std::string& sorted_domain, const size_t name_width,
299-
const size_t data_width) {
312+
void PrintProfiler(std::vector<std::vector<EventItem>>& events_table,
313+
std::string& sorted_domain, const size_t name_width,
314+
const size_t data_width) {
300315
// Output header information
301316
std::cout << "\n------------------------->"
302317
<< " Profiling Report "

paddle/platform/profiler.h

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ struct EventList {
8484
return result;
8585
}
8686

87+
void Clear() { event_blocks.clear(); }
88+
8789
std::forward_list<std::vector<Event>> event_blocks;
8890
};
8991

@@ -110,12 +112,9 @@ struct RecordEvent {
110112
std::string name_;
111113
};
112114

113-
// Enable the profiling function.
114-
void EnableProfiler(ProfilerState state);
115-
116115
// Return the event list of all threads. Asummed the returned value calls
117116
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
118-
std::vector<std::vector<Event>> DisableProfiler();
117+
std::vector<std::vector<Event>> GetAllEvents();
119118

120119
// The information of each event given in the profiling report
121120
struct EventItem {
@@ -130,13 +129,22 @@ struct EventItem {
130129
// Candidate keys to sort the profiling report
131130
enum EventSortingKey { kDefault, kCalls, kTotal, kMin, kMax, kAve };
132131

132+
// Enable the profiling function.
133+
void EnableProfiler(ProfilerState state);
134+
135+
// Clear the g_all_event_lists, which is total event lists of all threads.
136+
void ResetProfiler();
137+
138+
void DisableProfiler(EventSortingKey sorted_key);
139+
133140
// Parse the event list and output the profiling report
134141
void ParseEvents(std::vector<std::vector<Event>>&,
135142
EventSortingKey sorted_by = EventSortingKey::kDefault);
136143

137144
// Print results
138-
void PrintProfilingReport(std::vector<std::vector<EventItem>>& events_table,
139-
std::string& sorted_domain, const size_t name_width,
140-
const size_t data_width);
145+
void PrintProfiler(std::vector<std::vector<EventItem>>& events_table,
146+
std::string& sorted_domain, const size_t name_width,
147+
const size_t data_width);
148+
141149
} // namespace platform
142150
} // namespace paddle

paddle/platform/profiler_test.cc

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,18 +103,14 @@ TEST(RecordEvent, RecordEvent) {
103103
// Bad Usage:
104104
PushEvent("event_without_pop", dev_ctx);
105105
PopEvent("event_without_push", dev_ctx);
106-
std::vector<std::vector<Event>> events = paddle::platform::DisableProfiler();
107-
// Will remove parsing-related code from test later
108-
ParseEvents(events, EventSortingKey::kTotal);
106+
std::vector<std::vector<Event>> events = paddle::platform::GetAllEvents();
109107

110108
int cuda_startup_count = 0;
111109
int start_profiler_count = 0;
112-
int stop_profiler_count = 0;
113110
for (size_t i = 0; i < events.size(); ++i) {
114111
for (size_t j = 0; j < events[i].size(); ++j) {
115112
if (events[i][j].name() == "_cuda_startup_") ++cuda_startup_count;
116113
if (events[i][j].name() == "_start_profiler_") ++start_profiler_count;
117-
if (events[i][j].name() == "_stop_profiler_") ++stop_profiler_count;
118114
if (events[i][j].name() == "push") {
119115
EXPECT_EQ(events[i][j + 1].name(), "pop");
120116
#ifdef PADDLE_WITH_CUDA
@@ -127,5 +123,7 @@ TEST(RecordEvent, RecordEvent) {
127123
}
128124
EXPECT_EQ(cuda_startup_count % 5, 0);
129125
EXPECT_EQ(start_profiler_count, 1);
130-
EXPECT_EQ(stop_profiler_count, 1);
126+
127+
// Will remove parsing-related code from test later
128+
DisableProfiler(EventSortingKey::kTotal);
131129
}

paddle/pybind/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
if(WITH_PYTHON)
22
cc_library(paddle_pybind SHARED
33
SRCS pybind.cc exception.cc protobuf.cc const_value.cc
4-
DEPS pybind python backward proto_desc paddle_memory executor prune init
4+
DEPS pybind python backward proto_desc paddle_memory executor prune init profiler
55
${GLOB_OP_LIB})
66
if(NOT APPLE AND NOT ANDROID)
77
target_link_libraries(paddle_pybind rt)

paddle/pybind/protobuf.cc

Lines changed: 10 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -21,74 +21,24 @@ limitations under the License. */
2121
#include "paddle/framework/program_desc.h"
2222
#include "paddle/framework/var_desc.h"
2323

24-
// Cast boost::variant for PyBind.
25-
// Copy from
26-
// https://github.com/pybind/pybind11/issues/576#issuecomment-269563199
24+
using boost::variant;
25+
2726
namespace pybind11 {
2827
namespace detail {
2928

30-
// Can be replaced by a generic lambda in C++14
31-
struct variant_caster_visitor : public boost::static_visitor<handle> {
32-
return_value_policy policy;
33-
handle parent;
34-
35-
variant_caster_visitor(return_value_policy policy, handle parent)
36-
: policy(policy), parent(parent) {}
37-
38-
template <class T>
39-
handle operator()(T const &src) const {
40-
return make_caster<T>::cast(src, policy, parent);
41-
}
42-
};
43-
44-
template <class Variant>
45-
struct variant_caster;
46-
47-
template <template <class...> class V, class... Ts>
48-
struct variant_caster<V<Ts...>> {
49-
using Type = V<Ts...>;
50-
51-
template <typename T>
52-
typename std::enable_if<
53-
!std::is_same<T, boost::detail::variant::void_>::value, bool>::type
54-
try_load(handle src, bool convert) {
55-
auto caster = make_caster<T>();
56-
if (!load_success_ && caster.load(src, convert)) {
57-
load_success_ = true;
58-
value = cast_op<T>(caster);
59-
return true;
60-
}
61-
return false;
62-
}
63-
64-
template <typename T>
65-
typename std::enable_if<std::is_same<T, boost::detail::variant::void_>::value,
66-
bool>::type
67-
try_load(handle src, bool convert) {
68-
return false;
69-
}
70-
71-
bool load(handle src, bool convert) {
72-
auto unused = {false, try_load<Ts>(src, convert)...};
73-
(void)(unused);
74-
return load_success_;
75-
}
76-
77-
static handle cast(Type const &src, return_value_policy policy,
78-
handle parent) {
79-
variant_caster_visitor visitor(policy, parent);
80-
return boost::apply_visitor(visitor, src);
81-
}
82-
83-
PYBIND11_TYPE_CASTER(Type, _("Variant"));
84-
bool load_success_{false};
85-
};
86-
8729
// Add specialization for concrete variant type
8830
template <class... Args>
8931
struct type_caster<boost::variant<Args...>>
9032
: variant_caster<boost::variant<Args...>> {};
9133

34+
template <>
35+
struct visit_helper<boost::variant> {
36+
template <typename... Args>
37+
static auto call(Args &&... args) -> decltype(boost::apply_visitor(args...)) {
38+
return boost::apply_visitor(args...);
39+
}
40+
};
41+
9242
} // namespace detail
9343
} // namespace pybind11
9444

paddle/pybind/protobuf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License. */
1717
#include <Python.h>
1818
#include <fstream>
1919
#include <vector>
20+
#include "paddle/platform/variant.h"
2021
#include "pybind11/numpy.h"
2122
#include "pybind11/pybind11.h"
2223
#include "pybind11/stl.h"

paddle/pybind/pybind.cc

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

1515
#include "paddle/pybind/protobuf.h"
16+
#include "pybind11/iostream.h"
1617

1718
#include <mutex> // for call_once
1819
#include <unordered_map>
@@ -30,6 +31,7 @@ limitations under the License. */
3031
#include "paddle/operators/net_op.h"
3132
#include "paddle/platform/enforce.h"
3233
#include "paddle/platform/place.h"
34+
#include "paddle/platform/profiler.h"
3335
#include "paddle/pybind/const_value.h"
3436
#include "paddle/pybind/exception.h"
3537
#include "paddle/pybind/pybind.h"
@@ -60,8 +62,8 @@ bool IsCompileGPU() {
6062
#endif
6163
}
6264

63-
PYBIND11_PLUGIN(core) {
64-
py::module m("core", "C++ core of PaddlePaddle");
65+
PYBIND11_MODULE(core, m) {
66+
m.doc() = "C++ core of PaddlePaddle";
6567

6668
// using framework in this function. Since it is inside a function, it will
6769
// not cause namespace pollution.
@@ -481,7 +483,26 @@ All parameter, weight, gradient are variables in Paddle.
481483
m.def("nvprof_stop", platform::CudaProfilerStop);
482484
#endif
483485

484-
return m.ptr();
486+
py::enum_<platform::ProfilerState>(m, "ProfilerState", py::arithmetic())
487+
.value("kDisabled", platform::ProfilerState::kDisabled)
488+
.value("kCPU", platform::ProfilerState::kCPU)
489+
.value("kCUDA", platform::ProfilerState::kCUDA)
490+
.export_values();
491+
492+
py::enum_<platform::EventSortingKey>(m, "EventSortingKey", py::arithmetic())
493+
.value("kDefault", platform::EventSortingKey::kDefault)
494+
.value("kCalls", platform::EventSortingKey::kCalls)
495+
.value("kTotal", platform::EventSortingKey::kTotal)
496+
.value("kMin", platform::EventSortingKey::kMin)
497+
.value("kMax", platform::EventSortingKey::kMax)
498+
.value("kAve", platform::EventSortingKey::kAve)
499+
.export_values();
500+
501+
m.def("enable_profiler", platform::EnableProfiler);
502+
m.def("disable_profiler", platform::DisableProfiler);
503+
m.def("reset_profiler", platform::ResetProfiler);
504+
505+
py::add_ostream_redirect(m, "ostream_redirect");
485506
}
486507
} // namespace pybind
487508
} // namespace paddle

0 commit comments

Comments
 (0)