Skip to content

Commit ace512a

Browse files
author
Yibing Liu
authored
Merge pull request #8596 from kuke/profiler_multi_gpu
Fix the profiler's bug in multi-gpu mode
2 parents 62698d6 + c0876cf commit ace512a

File tree

6 files changed

+11
-18
lines changed

6 files changed

+11
-18
lines changed

paddle/fluid/framework/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
5656
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
5757
cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context)
5858
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog
59-
shape_inference data_transform lod_tensor)
59+
shape_inference data_transform lod_tensor profiler)
6060
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry init)
6161
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog)
6262

@@ -80,7 +80,7 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
8080
cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog)
8181

8282
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope
83-
framework_proto backward glog lod_rank_table profiler feed_fetch_method)
83+
framework_proto backward glog lod_rank_table feed_fetch_method)
8484

8585
cc_library(prune SRCS prune.cc DEPS framework_proto)
8686
cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)

paddle/fluid/framework/executor.cc

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ limitations under the License. */
2525
#include "paddle/fluid/framework/op_registry.h"
2626
#include "paddle/fluid/framework/reader.h"
2727
#include "paddle/fluid/platform/place.h"
28-
#include "paddle/fluid/platform/profiler.h"
2928

3029
DECLARE_bool(benchmark);
3130
DEFINE_bool(check_nan_inf, false,
@@ -126,11 +125,6 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
126125
for (auto& op_desc : block.AllOps()) {
127126
auto op = paddle::framework::OpRegistry::CreateOp(*op_desc);
128127

129-
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
130-
// TODO(panyx0718): Need a program id to distinguish programs.
131-
platform::RecordEvent record_event(op->Type(), pool.Get(place_),
132-
op_desc->Block()->ID());
133-
134128
VLOG(3) << place_ << " " << op->DebugStringEx(local_scope);
135129
op->Run(*local_scope, place_);
136130

paddle/fluid/framework/operator.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ limitations under the License. */
2121
#include "paddle/fluid/framework/operator.h"
2222
#include "paddle/fluid/framework/shape_inference.h"
2323
#include "paddle/fluid/framework/var_type.h"
24+
#include "paddle/fluid/platform/profiler.h"
2425

2526
DECLARE_bool(benchmark);
2627

@@ -497,7 +498,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
497498
this->InferShape(&infer_shape_ctx);
498499
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
499500
auto dev_ctx = pool.Get(place);
500-
501+
// profile
502+
platform::RecordEvent record_event(Type(), dev_ctx);
501503
// check if op[type] has kernel registered.
502504
auto& all_op_kernels = AllOpKernels();
503505
auto kernels_iter = all_op_kernels.find(type_);

paddle/fluid/platform/profiler.cc

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -132,21 +132,19 @@ void PopEvent(const std::string& name, const DeviceContext* dev_ctx) {
132132
GetEventList().Record(EventKind::kPopRange, name, g_thread_id, dev_ctx);
133133
}
134134

135-
RecordEvent::RecordEvent(const std::string& name, const DeviceContext* dev_ctx,
136-
int32_t block_id) {
135+
RecordEvent::RecordEvent(const std::string& name,
136+
const DeviceContext* dev_ctx) {
137137
if (g_state == ProfilerState::kDisabled) return;
138138
dev_ctx_ = dev_ctx;
139139
name_ = name;
140140
PushEvent(name_, dev_ctx_);
141-
142-
full_name_ = string::Sprintf("%s_b%d", name, block_id);
143141
// Maybe need the same push/pop behavior.
144-
SetCurAnnotation(full_name_.c_str());
142+
SetCurAnnotation(name_.c_str());
145143
}
146144

147145
RecordEvent::~RecordEvent() {
148-
ClearCurAnnotation();
149146
if (g_state == ProfilerState::kDisabled) return;
147+
ClearCurAnnotation();
150148
PopEvent(name_, dev_ctx_);
151149
}
152150

paddle/fluid/platform/profiler.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,7 @@ void PushEvent(const std::string& name, const DeviceContext* dev_ctx);
104104
void PopEvent(const std::string& name, const DeviceContext* dev_ctx);
105105

106106
struct RecordEvent {
107-
RecordEvent(const std::string& name, const DeviceContext* dev_ctx,
108-
int32_t block_id);
107+
RecordEvent(const std::string& name, const DeviceContext* dev_ctx);
109108

110109
~RecordEvent();
111110

paddle/fluid/platform/profiler_test.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ TEST(RecordEvent, RecordEvent) {
9595
*/
9696
for (int i = 1; i < 5; ++i) {
9797
std::string name = "evs_op_" + std::to_string(i);
98-
RecordEvent record_event(name, dev_ctx, 0);
98+
RecordEvent record_event(name, dev_ctx);
9999
int counter = 1;
100100
while (counter != i * 1000) counter++;
101101
}

0 commit comments

Comments
 (0)