Skip to content

Commit 8e4ad00

Browse files
authored
Merge pull request #16198 from velconia/imperative_train_speed
Improve imperative mode training speed
2 parents 2579ade + 3622537 commit 8e4ad00

File tree

16 files changed

+170
-57
lines changed

16 files changed

+170
-57
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
2424
"${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
2525
message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
2626
"${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
27+
message(STATUS "AR tools: ${CMAKE_AR}")
28+
2729
if(WIN32)
2830
set(CMAKE_SUPPRESS_REGENERATION ON)
2931
set(CMAKE_STATIC_LIBRARY_PREFIX lib)

paddle/fluid/framework/grad_op_desc_maker.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ limitations under the License. */
1414

1515
#pragma once
1616
#include <algorithm>
17+
#include <memory>
1718
#include <string>
19+
#include <unordered_map>
1820
#include <unordered_set>
1921
#include <vector>
2022
#include "paddle/fluid/framework/op_desc.h"
@@ -55,11 +57,11 @@ class GradOpDescMakerBase {
5557
std::back_inserter(ret_val),
5658
[this](const std::string& fwd_var_name) -> std::string {
5759
auto g_name = GradVarName(fwd_var_name);
58-
if (no_grad_set_.count(g_name)) {
59-
return kEmptyVarName;
60-
} else {
60+
if (no_grad_set_.empty() || !no_grad_set_.count(g_name)) {
6161
(*this->grad_to_var_)[g_name] = fwd_var_name;
6262
return g_name;
63+
} else {
64+
return kEmptyVarName;
6365
}
6466
});
6567
if (!drop_empty_grad) {

paddle/fluid/imperative/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ if(WITH_PYTHON)
22
cc_library(layer SRCS layer.cc DEPS proto_desc operator device_context blas pybind)
33
cc_library(tracer SRCS tracer.cc DEPS proto_desc device_context pybind)
44
cc_library(engine SRCS engine.cc)
5+
cc_library(imperative_profiler SRCS profiler.cc)
56
endif()

paddle/fluid/imperative/layer.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
239239
VLOG(3) << "apply grad op " << grad_op_desc->Type();
240240

241241
// Allocate tmp grad output variable
242-
for (auto it : grad_output_variable_map) {
242+
for (const auto& it : grad_output_variable_map) {
243243
auto& outputs = tmp_grad_outputs[k][it.first];
244244
outputs.reserve(it.second.size());
245245
for (size_t i = 0; i < it.second.size(); ++i) {
@@ -273,9 +273,9 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
273273

274274
// Add tmp grad outputs to original grad vars
275275
for (size_t k = 0; k < grad_output_vars_.size(); ++k) {
276-
for (auto it : grad_output_vars_[k]) {
276+
for (const auto& it : grad_output_vars_[k]) {
277277
auto& outputs = tmp_grad_outputs[k][it.first];
278-
auto& origin_outputs = it.second;
278+
const auto& origin_outputs = it.second;
279279
PADDLE_ENFORCE_EQ(outputs.size(), origin_outputs.size());
280280

281281
for (size_t i = 0; i < outputs.size(); ++i) {

paddle/fluid/imperative/layer.h

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -294,17 +294,23 @@ class PYBIND11_HIDDEN OpBase {
294294

295295
void InvokeBackwardHooks();
296296

297-
void TrackPreOp(const VarBase* inp_var, const std::string& inp_name) {
298-
if (inp_var->PreOp() && !inp_var->IsStopGradient()) {
299-
VLOG(3) << "add pre op " << inp_var->PreOp()->Type() << " in slot "
300-
<< inp_name;
301-
pre_ops_[inp_name].push_back(inp_var->PreOp());
302-
pre_ops_out_idx_[inp_name].push_back(inp_var->PreOpOutIdx());
303-
} else {
304-
VLOG(3) << "no pre op in slot " << inp_name
305-
<< " input var stop_gradient: " << inp_var->IsStopGradient();
306-
pre_ops_[inp_name].push_back(nullptr);
307-
// pre_ops_out_idx_[inp_name].push_back(-1);
297+
void TrackPreOp(const std::string& inp_name,
298+
const std::vector<VarBase*>& inputs) {
299+
auto& pre_ops_list = pre_ops_[inp_name];
300+
pre_ops_list.reserve(inputs.size());
301+
auto& pre_ops_out_idx_list = pre_ops_out_idx_[inp_name];
302+
for (VarBase* inp_var : inputs) {
303+
if (inp_var->PreOp() && !inp_var->IsStopGradient()) {
304+
VLOG(3) << "add pre op " << inp_var->PreOp()->Type() << " in slot "
305+
<< inp_name;
306+
pre_ops_list.emplace_back(inp_var->PreOp());
307+
pre_ops_out_idx_list.push_back(inp_var->PreOpOutIdx());
308+
} else {
309+
VLOG(3) << "no pre op in slot " << inp_name
310+
<< " input var stop_gradient: " << inp_var->IsStopGradient();
311+
pre_ops_list.emplace_back(nullptr);
312+
// pre_ops_out_idx_list.push_back(-1);
313+
}
308314
}
309315
}
310316

paddle/fluid/imperative/profiler.cc

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/imperative/profiler.h"
16+
17+
#ifdef WITH_GPERFTOOLS
18+
#include "gperftools/profiler.h"
19+
#endif
20+
#include <gflags/gflags.h>
21+
#include <glog/logging.h>
22+
#include <mutex> // NOLINT
23+
#include <thread> // NOLINT
24+
25+
DEFINE_string(
26+
tracer_profile_fname, "xxgperf",
27+
"Profiler filename for imperative tracer, which generated by gperftools."
28+
"Only valid when compiled `WITH_PROFILER=ON`. Empty if disable.");
29+
30+
namespace paddle {
31+
namespace imperative {
32+
33+
static std::once_flag gTracerProfileOnce;
34+
#ifdef WITH_GPERFTOOLS
35+
static bool gTracerProfilerStarted = false;
36+
#endif
37+
38+
void StartProfile() {
39+
if (!FLAGS_tracer_profile_fname.empty()) {
40+
std::call_once(gTracerProfileOnce, [] {
41+
#ifdef WITH_GPERFTOOLS
42+
ProfilerStart(FLAGS_tracer_profile_fname.c_str());
43+
gTracerProfilerStarted = true;
44+
#else
45+
LOG(WARNING) << "Paddle is not compiled with gperftools. "
46+
"FLAGS_tracer_profile_fname will be ignored";
47+
#endif
48+
});
49+
}
50+
}
51+
52+
void StopProfile() {
53+
#ifdef WITH_GPERFTOOLS
54+
ProfilerFlush();
55+
#else
56+
LOG(WARNING) << "Paddle is not compiled with gperftools. "
57+
"FLAGS_tracer_profile_fname will be ignored";
58+
#endif
59+
}
60+
61+
} // namespace imperative
62+
} // namespace paddle

paddle/fluid/imperative/profiler.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
namespace paddle {
18+
namespace imperative {
19+
20+
extern void StartProfile();
21+
22+
extern void StopProfile();
23+
24+
} // namespace imperative
25+
} // namespace paddle

paddle/fluid/imperative/tracer.cc

Lines changed: 3 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -23,23 +23,9 @@
2323
#include "paddle/fluid/platform/device_context.h"
2424
#include "paddle/fluid/platform/enforce.h"
2525

26-
#ifdef WITH_GPERFTOOLS
27-
#include "gperftools/profiler.h"
28-
#endif
29-
30-
DEFINE_string(
31-
tracer_profile_fname, "",
32-
"Profiler filename for imperative tracer, which generated by gperftools."
33-
"Only valid when compiled `WITH_PROFILER=ON`. Empty if disable.");
34-
3526
namespace paddle {
3627
namespace imperative {
3728

38-
static std::once_flag gTracerProfileOnce;
39-
#ifdef WITH_GPERFTOOLS
40-
static bool gTracerProfilerStarted = false;
41-
#endif
42-
4329
void CreateGradOp(const framework::OpDesc& op_desc,
4430
const std::unordered_set<std::string>& no_grad_set,
4531
const std::vector<framework::BlockDesc*>& grad_sub_block,
@@ -145,31 +131,13 @@ framework::VariableNameMap CreateOutputVarNameMap(
145131
return result;
146132
}
147133

148-
Tracer::Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {
149-
if (!FLAGS_tracer_profile_fname.empty()) {
150-
std::call_once(gTracerProfileOnce, [] {
151-
#ifdef WITH_GPERFTOOLS
152-
ProfilerStart(FLAGS_tracer_profile_fname.c_str());
153-
gTracerProfilerStarted = true;
154-
#else
155-
LOG(WARNING) << "Paddle is not compiled with gperftools. "
156-
"FLAGS_tracer_profile_fname will be ignored";
157-
#endif
158-
});
159-
}
160-
}
134+
Tracer::Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {}
161135

162136
std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
163137
const VarBasePtrMap& outputs,
164138
framework::AttributeMap attrs_map,
165139
const platform::Place expected_place,
166140
const bool stop_gradient) {
167-
#ifdef WITH_GPERFTOOLS
168-
if (gTracerProfilerStarted) {
169-
ProfilerFlush();
170-
}
171-
#endif
172-
173141
framework::VariableValueMap invars_map;
174142
framework::VariableValueMap outvars_map;
175143

@@ -184,14 +152,14 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
184152
inp->Name());
185153

186154
invars.emplace_back(inp->var_);
187-
op->TrackPreOp(inp, it.first);
188155
if (!stop_gradient) {
189156
current_vars_map[inp->Name()] = inp;
190157
}
191158
VLOG(3) << "input var name: " << inp->Name()
192159
<< " inited: " << inp->var_->IsInitialized()
193160
<< " stop_grad: " << inp->IsStopGradient();
194161
}
162+
op->TrackPreOp(it.first, it.second);
195163
}
196164

197165
op->output_vars_ = outputs;
@@ -319,9 +287,7 @@ std::vector<VarBase*> Tracer::PyTrace(OpBase* op,
319287
std::vector<framework::Variable*> ret_vars =
320288
PyLayer::Apply(op->forward_id_, inputs);
321289

322-
for (VarBase* inp : inputs) {
323-
op->TrackPreOp(inp, PyLayer::kFwdInp);
324-
}
290+
op->TrackPreOp(PyLayer::kFwdInp, inputs);
325291

326292
std::vector<VarBase*>& outputs = op->output_vars_[PyLayer::kFwdOut];
327293
outputs.reserve(ret_vars.size());

paddle/fluid/inference/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,5 +91,5 @@ if(WITH_TESTING)
9191
add_subdirectory(tests/book)
9292
if(WITH_INFERENCE_API_TEST)
9393
add_subdirectory(tests/api)
94-
endif()
94+
endif()
9595
endif()

paddle/fluid/pybind/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
set(PYBIND_DEPS pybind python proto_desc memory executor async_executor prune
22
feed_fetch_method pass_builder parallel_executor profiler layer scope_pool
3-
tracer analysis_predictor)
3+
tracer analysis_predictor imperative_profiler)
44

55
if(WITH_PYTHON)
66
list(APPEND PYBIND_DEPS py_func_op)

0 commit comments

Comments
 (0)