Skip to content

Commit 3861269

Browse files
committed
merge develop branch
2 parents 8329a1f + fa2ab33 commit 3861269

File tree

88 files changed

+2644
-958
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+2644
-958
lines changed

cmake/inference_lib.cmake

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ function(copy TARGET)
1818
set(oneValueArgs "")
1919
set(multiValueArgs SRCS DSTS DEPS)
2020
cmake_parse_arguments(copy_lib "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
21-
set(inference_lib_dist_dep ${TARGET} ${inference_lib_dist_dep} PARENT_SCOPE)
21+
set(fluid_lib_dist_dep ${TARGET} ${fluid_lib_dist_dep} PARENT_SCOPE)
2222

2323
list(LENGTH copy_lib_SRCS copy_lib_SRCS_len)
2424
list(LENGTH copy_lib_DSTS copy_lib_DSTS_len)
@@ -185,7 +185,8 @@ copy(cmake_cache
185185
SRCS ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt
186186
DSTS ${FLUID_INSTALL_DIR})
187187

188-
add_custom_target(inference_lib_dist DEPENDS ${inference_lib_dist_dep})
188+
# This command generates a complete fluid library for both train and inference
189+
add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep})
189190

190191
# paddle fluid version
191192
execute_process(

paddle/fluid/API.spec

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'outp
7575
paddle.fluid.layers.conv3d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
7676
paddle.fluid.layers.sequence_expand ArgSpec(args=['x', 'y', 'ref_level', 'name'], varargs=None, keywords=None, defaults=(-1, None))
7777
paddle.fluid.layers.sequence_expand_as ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,))
78-
paddle.fluid.layers.sequence_pad ArgSpec(args=['x', 'pad_value', 'maxlen'], varargs=None, keywords=None, defaults=(None,))
78+
paddle.fluid.layers.sequence_pad ArgSpec(args=['x', 'pad_value', 'maxlen', 'name'], varargs=None, keywords=None, defaults=(None, None))
79+
paddle.fluid.layers.sequence_unpad ArgSpec(args=['x', 'length', 'name'], varargs=None, keywords=None, defaults=(None,))
7980
paddle.fluid.layers.lstm_unit ArgSpec(args=['x_t', 'hidden_t_prev', 'cell_t_prev', 'forget_bias', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(0.0, None, None, None))
8081
paddle.fluid.layers.reduce_sum ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
8182
paddle.fluid.layers.reduce_mean ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
@@ -127,6 +128,7 @@ paddle.fluid.layers.relu ArgSpec(args=['x', 'name'], varargs=None, keywords=None
127128
paddle.fluid.layers.log ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
128129
paddle.fluid.layers.crop ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
129130
paddle.fluid.layers.rank_loss ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,))
131+
paddle.fluid.layers.margin_rank_loss ArgSpec(args=['label', 'left', 'right', 'margin', 'name'], varargs=None, keywords=None, defaults=(0.1, None))
130132
paddle.fluid.layers.elu ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(1.0, None))
131133
paddle.fluid.layers.relu6 ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(6.0, None))
132134
paddle.fluid.layers.pow ArgSpec(args=['x', 'factor', 'name'], varargs=None, keywords=None, defaults=(1.0, None))

paddle/fluid/CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,5 @@ endif(NOT WIN32)
1212
if(WITH_INFERENCE)
1313
# NOTE: please add subdirectory inference at last.
1414
add_subdirectory(inference)
15+
add_subdirectory(train)
1516
endif()
16-
17-
add_subdirectory(train)

paddle/fluid/framework/details/op_handle_base.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ class OpHandleBase {
6464
virtual bool IsMultiDeviceTransfer() { return false; }
6565

6666
const platform::DeviceContext *DeviceContext(platform::Place place) {
67-
return dev_ctxes_[place];
67+
auto it = dev_ctxes_.find(place);
68+
return it != dev_ctxes_.end() ? it->second : nullptr;
6869
}
6970

7071
void SetDeviceContext(platform::Place place, platform::DeviceContext *ctx_) {

paddle/fluid/framework/executor.cc

Lines changed: 43 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,41 @@ ExecutorPrepareContext::~ExecutorPrepareContext() {
4646
VLOG(5) << "destroy ExecutorPrepareContext";
4747
}
4848

49+
template <typename RefCntMap>
50+
static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op,
51+
GarbageCollector<Tensor>* gc,
52+
RefCntMap* ref_cnts) {
53+
std::unordered_set<Tensor*> erase_tensors;
54+
55+
auto handler = [&](const VariableNameMap& name_map) {
56+
for (auto& name_pair : name_map) {
57+
for (auto& name : name_pair.second) {
58+
auto it = ref_cnts->find(name);
59+
if (it == ref_cnts->end()) continue;
60+
if ((it->second)-- == 1) {
61+
auto* var = scope.FindVar(name);
62+
if (var != nullptr) {
63+
VLOG(10) << "Erase tensor \'" << name << "\'";
64+
if (var->IsType<LoDTensor>()) {
65+
erase_tensors.insert(var->GetMutable<LoDTensor>());
66+
} else if (var->IsType<SelectedRows>()) {
67+
erase_tensors.insert(
68+
var->GetMutable<SelectedRows>()->mutable_value());
69+
}
70+
}
71+
}
72+
}
73+
}
74+
};
75+
76+
handler(op->Inputs());
77+
handler(op->Outputs());
78+
79+
if (!erase_tensors.empty()) {
80+
gc->Add(erase_tensors);
81+
}
82+
}
83+
4984
Executor::Executor(const platform::Place& place) : place_(place) {}
5085

5186
void Executor::Close() {
@@ -331,9 +366,13 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
331366
}
332367

333368
int64_t max_memory_size = GetEagerDeletionThreshold();
334-
335369
std::unique_ptr<GarbageCollector<Tensor>> gc;
336-
if (max_memory_size >= 0) {
370+
// WhileOp would set keep_kids to false
371+
// WhileGradOp would need the scopes created in WhileOp
372+
// Perhaps, we should not perform eager deletion in WhileOp
373+
// The scopes and variables created by WhileOp would be deleted
374+
// in WhileGradOp.
375+
if (max_memory_size >= 0 && !keep_kids) {
337376
ctx->ResetReferenceCount();
338377
#ifdef PADDLE_WITH_CUDA
339378
if (platform::is_gpu_place(place_)) {
@@ -352,45 +391,8 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
352391
op->Run(*local_scope, place_);
353392

354393
if (gc != nullptr) {
355-
std::vector<std::string> erase_vars;
356-
for (auto& input : op->Inputs()) {
357-
for (auto& input_name : input.second) {
358-
auto it = ctx->cur_ref_cnts_.find(input_name);
359-
if (it == ctx->cur_ref_cnts_.end()) continue;
360-
if (it->second == 1) { // should delete it
361-
erase_vars.emplace_back(input_name);
362-
ctx->cur_ref_cnts_.erase(input_name);
363-
} else {
364-
--(it->second);
365-
}
366-
}
367-
}
368-
369-
for (auto& output : op->Outputs()) {
370-
for (auto& output_name : output.second) {
371-
auto it = ctx->cur_ref_cnts_.find(output_name);
372-
if (it == ctx->cur_ref_cnts_.end()) continue;
373-
if (it->second == 1) {
374-
erase_vars.emplace_back(output_name);
375-
ctx->cur_ref_cnts_.erase(output_name);
376-
} else {
377-
--(it->second);
378-
}
379-
}
380-
}
381-
382-
if (!erase_vars.empty()) {
383-
std::vector<framework::LoDTensor*> erase_tensors;
384-
for (auto& name : erase_vars) {
385-
auto* var = local_scope->FindVar(name);
386-
if (var == nullptr) continue;
387-
if (var->IsType<framework::LoDTensor>()) {
388-
auto* tensor = var->GetMutable<framework::LoDTensor>();
389-
erase_tensors.push_back(tensor);
390-
}
391-
}
392-
if (!erase_tensors.empty()) gc->Add(erase_tensors);
393-
}
394+
DeleteUnusedTensors(*local_scope, op.get(), gc.get(),
395+
&(ctx->cur_ref_cnts_));
394396
}
395397

396398
if (FLAGS_benchmark) {

paddle/fluid/framework/executor.h

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -32,38 +32,32 @@ template <typename T>
3232
std::unordered_map<std::string, T> GetNonPersistableReferenceCount(
3333
const ProgramDesc& prog, size_t block_id) {
3434
auto& block = prog.Block(block_id);
35-
std::unordered_set<std::string> ignored_vars;
3635
std::unordered_map<std::string, T> ref_cnts;
3736

38-
for (auto var_desc : block.AllVars()) {
39-
auto type = var_desc->Proto()->type().type();
40-
if (type != proto::VarType::LOD_TENSOR || var_desc->Persistable()) {
41-
ignored_vars.insert(var_desc->Name()); // ignore persistable vars
42-
}
43-
}
44-
45-
for (auto op_desc : block.AllOps()) {
46-
for (auto& input : op_desc->Inputs()) {
47-
for (auto& input_name : input.second) {
48-
if (!ignored_vars.count(input_name)) {
49-
if (ref_cnts.count(input_name))
50-
++ref_cnts[input_name];
51-
else
52-
ref_cnts[input_name] = 1;
37+
auto update_ref_cnts = [&](OpDesc* op_desc, const VariableNameMap& name_map) {
38+
for (auto& name_pair : name_map) {
39+
for (auto& name : name_pair.second) {
40+
auto* var_desc = block.FindVar(name);
41+
if (var_desc == nullptr || var_desc->Persistable()) continue;
42+
auto type = var_desc->Proto()->type().type();
43+
if (type != proto::VarType::LOD_TENSOR &&
44+
type != proto::VarType::SELECTED_ROWS) {
45+
continue;
5346
}
54-
}
55-
}
5647

57-
for (auto& output : op_desc->Outputs()) {
58-
for (auto output_name : output.second) {
59-
if (!ignored_vars.count(output_name)) {
60-
if (ref_cnts.count(output_name))
61-
++ref_cnts[output_name];
62-
else
63-
ref_cnts[output_name] = 1;
48+
auto it = ref_cnts.find(name);
49+
if (it != ref_cnts.end()) {
50+
++it->second;
51+
} else {
52+
ref_cnts[name] = 1;
6453
}
6554
}
6655
}
56+
};
57+
58+
for (auto op_desc : block.AllOps()) {
59+
update_ref_cnts(op_desc, op_desc->Inputs());
60+
update_ref_cnts(op_desc, op_desc->Outputs());
6761
}
6862
return ref_cnts;
6963
}

paddle/fluid/framework/ir/CMakeLists.txt

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ pass_library(graph_to_program_pass base)
3030
pass_library(graph_viz_pass base)
3131
pass_library(fc_fuse_pass inference)
3232
if (WITH_MKLDNN)
33-
pass_library(conv_bias_mkldnn_fuse_pass inference)
3433
pass_library(conv_relu_mkldnn_fuse_pass inference)
3534
endif ()
3635
pass_library(attention_lstm_fuse_pass inference)
@@ -53,7 +52,6 @@ cc_test(graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_r
5352
cc_test(graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass)
5453
cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector)
5554
cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto)
56-
if(WITH_MKLDNN)
57-
cc_test(test_conv_bias_mkldnn_fuse_pass SRCS conv_bias_mkldnn_fuse_pass_tester.cc DEPS conv_bias_mkldnn_fuse_pass)
55+
if (WITH_MKLDNN)
5856
cc_test(test_conv_relu_mkldnn_fuse_pass SRCS conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass)
59-
endif()
57+
endif ()

paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.cc

Lines changed: 0 additions & 78 deletions
This file was deleted.

0 commit comments

Comments
 (0)