Skip to content

Commit cc7f551

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into optimize-pyreader
test=develop
2 parents 305d211 + 6447155 commit cc7f551

File tree

95 files changed

+4511
-913
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+4511
-913
lines changed

cmake/inference_lib.cmake

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ function(copy TARGET)
1818
set(oneValueArgs "")
1919
set(multiValueArgs SRCS DSTS DEPS)
2020
cmake_parse_arguments(copy_lib "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
21-
set(inference_lib_dist_dep ${TARGET} ${inference_lib_dist_dep} PARENT_SCOPE)
21+
set(fluid_lib_dist_dep ${TARGET} ${fluid_lib_dist_dep} PARENT_SCOPE)
2222

2323
list(LENGTH copy_lib_SRCS copy_lib_SRCS_len)
2424
list(LENGTH copy_lib_DSTS copy_lib_DSTS_len)
@@ -185,7 +185,8 @@ copy(cmake_cache
185185
SRCS ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt
186186
DSTS ${FLUID_INSTALL_DIR})
187187

188-
add_custom_target(inference_lib_dist DEPENDS ${inference_lib_dist_dep})
188+
# This command generates a complete fluid library for both train and inference
189+
add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep})
189190

190191
# paddle fluid version
191192
execute_process(

paddle/fluid/API.spec

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'outp
7575
paddle.fluid.layers.conv3d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
7676
paddle.fluid.layers.sequence_expand ArgSpec(args=['x', 'y', 'ref_level', 'name'], varargs=None, keywords=None, defaults=(-1, None))
7777
paddle.fluid.layers.sequence_expand_as ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,))
78-
paddle.fluid.layers.sequence_pad ArgSpec(args=['x', 'pad_value', 'maxlen'], varargs=None, keywords=None, defaults=(None,))
78+
paddle.fluid.layers.sequence_pad ArgSpec(args=['x', 'pad_value', 'maxlen', 'name'], varargs=None, keywords=None, defaults=(None, None))
79+
paddle.fluid.layers.sequence_unpad ArgSpec(args=['x', 'length', 'name'], varargs=None, keywords=None, defaults=(None,))
7980
paddle.fluid.layers.lstm_unit ArgSpec(args=['x_t', 'hidden_t_prev', 'cell_t_prev', 'forget_bias', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(0.0, None, None, None))
8081
paddle.fluid.layers.reduce_sum ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
8182
paddle.fluid.layers.reduce_mean ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
@@ -84,6 +85,7 @@ paddle.fluid.layers.reduce_min ArgSpec(args=['input', 'dim', 'keep_dim', 'name']
8485
paddle.fluid.layers.reduce_prod ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
8586
paddle.fluid.layers.sequence_first_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
8687
paddle.fluid.layers.sequence_last_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
88+
paddle.fluid.layers.sequence_slice ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,))
8789
paddle.fluid.layers.dropout ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name'], varargs=None, keywords=None, defaults=(False, None, None))
8890
paddle.fluid.layers.split ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None))
8991
paddle.fluid.layers.ctc_greedy_decoder ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,))
@@ -127,6 +129,7 @@ paddle.fluid.layers.relu ArgSpec(args=['x', 'name'], varargs=None, keywords=None
127129
paddle.fluid.layers.log ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
128130
paddle.fluid.layers.crop ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
129131
paddle.fluid.layers.rank_loss ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,))
132+
paddle.fluid.layers.margin_rank_loss ArgSpec(args=['label', 'left', 'right', 'margin', 'name'], varargs=None, keywords=None, defaults=(0.1, None))
130133
paddle.fluid.layers.elu ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(1.0, None))
131134
paddle.fluid.layers.relu6 ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(6.0, None))
132135
paddle.fluid.layers.pow ArgSpec(args=['x', 'factor', 'name'], varargs=None, keywords=None, defaults=(1.0, None))

paddle/fluid/framework/details/op_handle_base.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ class OpHandleBase {
6464
virtual bool IsMultiDeviceTransfer() { return false; }
6565

6666
const platform::DeviceContext *DeviceContext(platform::Place place) {
67-
return dev_ctxes_[place];
67+
auto it = dev_ctxes_.find(place);
68+
return it != dev_ctxes_.end() ? it->second : nullptr;
6869
}
6970

7071
void SetDeviceContext(platform::Place place, platform::DeviceContext *ctx_) {

paddle/fluid/framework/executor.cc

Lines changed: 44 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,41 @@ ExecutorPrepareContext::~ExecutorPrepareContext() {
4646
VLOG(5) << "destroy ExecutorPrepareContext";
4747
}
4848

49+
template <typename RefCntMap>
50+
static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op,
51+
GarbageCollector<Tensor>* gc,
52+
RefCntMap* ref_cnts) {
53+
std::unordered_set<Tensor*> erase_tensors;
54+
55+
auto handler = [&](const VariableNameMap& name_map) {
56+
for (auto& name_pair : name_map) {
57+
for (auto& name : name_pair.second) {
58+
auto it = ref_cnts->find(name);
59+
if (it == ref_cnts->end()) continue;
60+
if ((it->second)-- == 1) {
61+
auto* var = scope.FindVar(name);
62+
if (var != nullptr) {
63+
VLOG(10) << "Erase tensor \'" << name << "\'";
64+
if (var->IsType<LoDTensor>()) {
65+
erase_tensors.insert(var->GetMutable<LoDTensor>());
66+
} else if (var->IsType<SelectedRows>()) {
67+
erase_tensors.insert(
68+
var->GetMutable<SelectedRows>()->mutable_value());
69+
}
70+
}
71+
}
72+
}
73+
}
74+
};
75+
76+
handler(op->Inputs());
77+
handler(op->Outputs());
78+
79+
if (!erase_tensors.empty()) {
80+
gc->Add(erase_tensors);
81+
}
82+
}
83+
4984
Executor::Executor(const platform::Place& place) : place_(place) {}
5085

5186
void Executor::Close() {
@@ -66,7 +101,7 @@ void InitializeVariable(Variable* var, proto::VarType::Type var_type) {
66101
} else if (var_type == proto::VarType::FETCH_LIST) {
67102
var->GetMutable<FeedFetchList>();
68103
} else if (var_type == proto::VarType::STEP_SCOPES) {
69-
var->GetMutable<std::vector<framework::Scope>>();
104+
var->GetMutable<std::vector<framework::Scope*>>();
70105
} else if (var_type == proto::VarType::LOD_RANK_TABLE) {
71106
var->GetMutable<LoDRankTable>();
72107
} else if (var_type == proto::VarType::LOD_TENSOR_ARRAY) {
@@ -331,9 +366,13 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
331366
}
332367

333368
int64_t max_memory_size = GetEagerDeletionThreshold();
334-
335369
std::unique_ptr<GarbageCollector<Tensor>> gc;
336-
if (max_memory_size >= 0) {
370+
// WhileOp would set keep_kids to false
371+
// WhileGradOp would need the scopes created in WhileOp
372+
// Perhaps, we should not perform eager deletion in WhileOp
373+
// The scopes and variables created by WhileOp would be deleted
374+
// in WhileGradOp.
375+
if (max_memory_size >= 0 && !keep_kids) {
337376
ctx->ResetReferenceCount();
338377
#ifdef PADDLE_WITH_CUDA
339378
if (platform::is_gpu_place(place_)) {
@@ -352,45 +391,8 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
352391
op->Run(*local_scope, place_);
353392

354393
if (gc != nullptr) {
355-
std::vector<std::string> erase_vars;
356-
for (auto& input : op->Inputs()) {
357-
for (auto& input_name : input.second) {
358-
auto it = ctx->cur_ref_cnts_.find(input_name);
359-
if (it == ctx->cur_ref_cnts_.end()) continue;
360-
if (it->second == 1) { // should delete it
361-
erase_vars.emplace_back(input_name);
362-
ctx->cur_ref_cnts_.erase(input_name);
363-
} else {
364-
--(it->second);
365-
}
366-
}
367-
}
368-
369-
for (auto& output : op->Outputs()) {
370-
for (auto& output_name : output.second) {
371-
auto it = ctx->cur_ref_cnts_.find(output_name);
372-
if (it == ctx->cur_ref_cnts_.end()) continue;
373-
if (it->second == 1) {
374-
erase_vars.emplace_back(output_name);
375-
ctx->cur_ref_cnts_.erase(output_name);
376-
} else {
377-
--(it->second);
378-
}
379-
}
380-
}
381-
382-
if (!erase_vars.empty()) {
383-
std::vector<framework::LoDTensor*> erase_tensors;
384-
for (auto& name : erase_vars) {
385-
auto* var = local_scope->FindVar(name);
386-
if (var == nullptr) continue;
387-
if (var->IsType<framework::LoDTensor>()) {
388-
auto* tensor = var->GetMutable<framework::LoDTensor>();
389-
erase_tensors.push_back(tensor);
390-
}
391-
}
392-
if (!erase_tensors.empty()) gc->Add(erase_tensors);
393-
}
394+
DeleteUnusedTensors(*local_scope, op.get(), gc.get(),
395+
&(ctx->cur_ref_cnts_));
394396
}
395397

396398
if (FLAGS_benchmark) {

paddle/fluid/framework/executor.h

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -32,38 +32,32 @@ template <typename T>
3232
std::unordered_map<std::string, T> GetNonPersistableReferenceCount(
3333
const ProgramDesc& prog, size_t block_id) {
3434
auto& block = prog.Block(block_id);
35-
std::unordered_set<std::string> ignored_vars;
3635
std::unordered_map<std::string, T> ref_cnts;
3736

38-
for (auto var_desc : block.AllVars()) {
39-
auto type = var_desc->Proto()->type().type();
40-
if (type != proto::VarType::LOD_TENSOR || var_desc->Persistable()) {
41-
ignored_vars.insert(var_desc->Name()); // ignore persistable vars
42-
}
43-
}
44-
45-
for (auto op_desc : block.AllOps()) {
46-
for (auto& input : op_desc->Inputs()) {
47-
for (auto& input_name : input.second) {
48-
if (!ignored_vars.count(input_name)) {
49-
if (ref_cnts.count(input_name))
50-
++ref_cnts[input_name];
51-
else
52-
ref_cnts[input_name] = 1;
37+
auto update_ref_cnts = [&](OpDesc* op_desc, const VariableNameMap& name_map) {
38+
for (auto& name_pair : name_map) {
39+
for (auto& name : name_pair.second) {
40+
auto* var_desc = block.FindVar(name);
41+
if (var_desc == nullptr || var_desc->Persistable()) continue;
42+
auto type = var_desc->Proto()->type().type();
43+
if (type != proto::VarType::LOD_TENSOR &&
44+
type != proto::VarType::SELECTED_ROWS) {
45+
continue;
5346
}
54-
}
55-
}
5647

57-
for (auto& output : op_desc->Outputs()) {
58-
for (auto output_name : output.second) {
59-
if (!ignored_vars.count(output_name)) {
60-
if (ref_cnts.count(output_name))
61-
++ref_cnts[output_name];
62-
else
63-
ref_cnts[output_name] = 1;
48+
auto it = ref_cnts.find(name);
49+
if (it != ref_cnts.end()) {
50+
++it->second;
51+
} else {
52+
ref_cnts[name] = 1;
6453
}
6554
}
6655
}
56+
};
57+
58+
for (auto op_desc : block.AllOps()) {
59+
update_ref_cnts(op_desc, op_desc->Inputs());
60+
update_ref_cnts(op_desc, op_desc->Outputs());
6761
}
6862
return ref_cnts;
6963
}

paddle/fluid/framework/feed_fetch_method.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@ void SetFeedVariable(Scope* scope, const LoDTensor& input,
2727
// be created.
2828
VLOG(3) << "SetFeedVariable name=" << var_name << " index=" << index;
2929
Variable* g_feed_value = scope->Var(var_name);
30-
auto& feed_inputs =
31-
*(g_feed_value->GetMutable<std::vector<paddle::framework::LoDTensor>>());
30+
auto& feed_inputs = *(g_feed_value->GetMutable<FeedFetchList>());
3231
if (index >= feed_inputs.size()) {
3332
feed_inputs.resize(index + 1);
3433
}

0 commit comments

Comments
 (0)