Skip to content

Commit b31905c

Browse files
committed
Merge branch 'develop' into resnet50_ut
2 parents 1dcd6ee + 7a5f3f7 commit b31905c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+2369
-133
lines changed

cmake/external/anakin.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ ExternalProject_Add(
5252
PREFIX ${ANAKIN_SOURCE_DIR}
5353
UPDATE_COMMAND ""
5454
CMAKE_ARGS ${CMAKE_ARGS_PREFIX}
55+
-DUSE_LOGGER=YES
5556
-DUSE_X86_PLACE=YES
5657
-DBUILD_WITH_UNIT_TEST=NO
5758
-DPROTOBUF_ROOT=${THIRD_PARTY_PATH}/install/protobuf

paddle/fluid/API.spec

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ paddle.fluid.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'en
2121
paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
2222
paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
2323
paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
24-
paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'], varargs=None, keywords=None, defaults=(None, False, 0))
24+
paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False))
2525
paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
2626
paddle.fluid.DistributeTranspilerConfig.__init__
2727
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None))
@@ -299,13 +299,17 @@ paddle.fluid.contrib.BeamSearchDecoder.read_array ArgSpec(args=['self', 'init',
299299
paddle.fluid.contrib.BeamSearchDecoder.update_array ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None)
300300
paddle.fluid.contrib.memory_usage ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None)
301301
paddle.fluid.contrib.op_freq_statistic ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None)
302+
paddle.fluid.contrib.QuantizeTranspiler.__init__ ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000))
303+
paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,))
304+
paddle.fluid.contrib.QuantizeTranspiler.freeze_program ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None))
305+
paddle.fluid.contrib.QuantizeTranspiler.training_transpile ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
302306
paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
303307
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
304308
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
305309
paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
306310
paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
307311
paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
308-
paddle.fluid.transpiler.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'], varargs=None, keywords=None, defaults=(None, False, 0))
312+
paddle.fluid.transpiler.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False))
309313
paddle.fluid.transpiler.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
310314
paddle.fluid.transpiler.HashName.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None)
311315
paddle.fluid.transpiler.HashName.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None)

paddle/fluid/framework/CMakeLists.txt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,9 @@ else()
5656
cc_test(mixed_vector_test SRCS mixed_vector_test.cc DEPS place memory device_context tensor)
5757
endif()
5858
if (NOT WIN32)
59-
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio version)
59+
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio version)
6060
else()
61-
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto version)
61+
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto version)
6262
endif (NOT WIN32)
6363

6464
cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory)
@@ -141,12 +141,15 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
141141

142142
cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog)
143143

144+
cc_library(naive_executor SRCS naive_executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass)
145+
144146
if(WITH_DISTRIBUTE)
145147
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method sendrecvop_grpc cares grpc++_unsecure grpc_unsecure gpr graph_to_program_pass)
146148
set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
147149
set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
148150
else()
149151
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass)
152+
cc_test(test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass elementwise_add_op)
150153
endif()
151154

152155
if (NOT WIN32)

paddle/fluid/framework/ir/CMakeLists.txt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ cc_library(graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph grap
2828
pass_library(graph_to_program_pass base)
2929
pass_library(graph_viz_pass base)
3030
pass_library(fc_fuse_pass inference)
31-
if(WITH_MKLDNN)
32-
pass_library(conv_relu_mkldnn_fuse_pass inference)
33-
endif()
31+
if (WITH_MKLDNN)
32+
pass_library(conv_relu_mkldnn_fuse_pass inference)
33+
endif ()
3434
pass_library(attention_lstm_fuse_pass inference)
3535
pass_library(infer_clean_graph_pass inference)
3636
pass_library(fc_lstm_fuse_pass inference)
@@ -49,6 +49,6 @@ cc_test(graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_r
4949
cc_test(graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass)
5050
cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector)
5151
cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto)
52-
if(WITH_MKLDNN)
53-
cc_test(test_conv_relu_mkldnn_fuse_pass SRCS conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass)
54-
endif()
52+
if (WITH_MKLDNN)
53+
cc_test(test_conv_relu_mkldnn_fuse_pass SRCS conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass)
54+
endif ()
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/framework/naive_executor.h"
16+
#include "paddle/fluid/framework/channel.h"
17+
#include "paddle/fluid/framework/feed_fetch_method.h"
18+
#include "paddle/fluid/framework/lod_rank_table.h"
19+
#include "paddle/fluid/framework/lod_tensor_array.h"
20+
#include "paddle/fluid/framework/op_registry.h"
21+
#include "paddle/fluid/framework/reader.h"
22+
#include "paddle/fluid/string/pretty_log.h"
23+
24+
namespace paddle {
25+
namespace framework {
26+
27+
// These code can be shared with Executor.
28+
static void InitializeVariable(Variable *var, proto::VarType::Type var_type) {
29+
if (var_type == proto::VarType::LOD_TENSOR) {
30+
var->GetMutable<LoDTensor>();
31+
} else if (var_type == proto::VarType::SELECTED_ROWS) {
32+
var->GetMutable<SelectedRows>();
33+
} else if (var_type == proto::VarType::FEED_MINIBATCH) {
34+
var->GetMutable<FeedFetchList>();
35+
} else if (var_type == proto::VarType::FETCH_LIST) {
36+
var->GetMutable<FeedFetchList>();
37+
} else if (var_type == proto::VarType::STEP_SCOPES) {
38+
var->GetMutable<std::vector<framework::Scope>>();
39+
} else if (var_type == proto::VarType::LOD_RANK_TABLE) {
40+
var->GetMutable<LoDRankTable>();
41+
} else if (var_type == proto::VarType::LOD_TENSOR_ARRAY) {
42+
var->GetMutable<LoDTensorArray>();
43+
} else if (var_type == proto::VarType::PLACE_LIST) {
44+
var->GetMutable<platform::PlaceList>();
45+
} else if (var_type == proto::VarType::READER) {
46+
var->GetMutable<ReaderHolder>();
47+
} else if (var_type == proto::VarType::CHANNEL) {
48+
var->GetMutable<ChannelHolder>();
49+
} else if (var_type == proto::VarType::RAW) {
50+
// GetMutable will be called in operator
51+
} else {
52+
PADDLE_THROW(
53+
"Variable type %d is not in "
54+
"[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
55+
"LOD_RANK_TABLE, PLACE_LIST, READER, CHANNEL, RAW]",
56+
var_type);
57+
}
58+
}
59+
60+
void NaiveExecutor::Prepare(Scope *parent_scope,
61+
const ProgramDesc &program_desc, int block_id,
62+
bool with_feed_fetch_ops) {
63+
if (!parent_scope) {
64+
scope_ = new framework::Scope;
65+
} else {
66+
scope_ = &parent_scope->NewScope();
67+
}
68+
CreateVariables(program_desc, scope_, block_id);
69+
CreateOps(program_desc, block_id, with_feed_fetch_ops);
70+
}
71+
72+
void NaiveExecutor::Run() {
73+
for (auto &op : ops_) {
74+
VLOG(4) << "run " << op->Type();
75+
op->Run(*scope_, place_);
76+
}
77+
}
78+
79+
void NaiveExecutor::CreateVariables(const ProgramDesc &desc, Scope *scope,
80+
int block_id) {
81+
PADDLE_ENFORCE(scope);
82+
auto &global_block = desc.Block(block_id);
83+
84+
const Scope *ancestor_scope = scope;
85+
while (ancestor_scope->parent()) {
86+
ancestor_scope = ancestor_scope->parent();
87+
}
88+
89+
if (ancestor_scope != scope) {
90+
for (auto &var : global_block.AllVars()) {
91+
if (var->Name() == framework::kEmptyVarName) {
92+
continue;
93+
}
94+
// Create persistable vars in ancestor scope.
95+
if (var->Persistable()) {
96+
auto *ptr = const_cast<Scope *>(ancestor_scope)->Var(var->Name());
97+
InitializeVariable(ptr, var->GetType());
98+
VLOG(3) << "Create Variable " << var->Name()
99+
<< " global, which pointer is " << ptr;
100+
} else { // Create temporary variables in local scope.
101+
auto *ptr = scope->Var(var->Name());
102+
InitializeVariable(ptr, var->GetType());
103+
VLOG(3) << "Create Variable " << var->Name()
104+
<< " locally, which pointer is " << ptr;
105+
}
106+
}
107+
} else {
108+
for (auto &var : global_block.AllVars()) {
109+
auto *ptr = scope->Var(var->Name());
110+
InitializeVariable(ptr, var->GetType());
111+
VLOG(3) << "Create variable " << var->Name() << ", which pointer is "
112+
<< ptr;
113+
}
114+
}
115+
}
116+
117+
void NaiveExecutor::CreateOps(const ProgramDesc &desc, int block_id,
118+
bool with_feed_fetch_ops) {
119+
for (const auto &op_desc : desc.Block(block_id).AllOps()) {
120+
if (!with_feed_fetch_ops &&
121+
(op_desc->Type() == "feed" || op_desc->Type() == "fetch")) {
122+
string::PrettyLogEndl(string::Style::detail(), "--- skip [%s], %s -> %s",
123+
op_desc->Input("X")[0], op_desc->Type(),
124+
op_desc->Output("Out")[0]);
125+
continue;
126+
}
127+
ops_.emplace_back(OpRegistry::CreateOp(*op_desc));
128+
}
129+
}
130+
131+
LoDTensor *NaiveExecutor::FindTensor(const std::string &name) {
132+
PADDLE_ENFORCE(scope_, "Need to init scope first");
133+
auto *var = scope_->FindVar(name);
134+
PADDLE_ENFORCE(var, "No variable [%s] in the scope");
135+
auto *tensor = const_cast<LoDTensor *>(&var->Get<LoDTensor>());
136+
return tensor;
137+
}
138+
139+
void NaiveExecutor::CleanFeedFetchOps() {
140+
std::vector<std::unique_ptr<OperatorBase>> ops;
141+
for (auto &op : ops_) {
142+
if (op->Type() != "feed" && op->Type() != "fetch") {
143+
ops.emplace_back(std::move(op));
144+
}
145+
}
146+
ops_.swap(ops);
147+
}
148+
149+
} // namespace framework
150+
} // namespace paddle
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include "paddle/fluid/framework/operator.h"
18+
#include "paddle/fluid/framework/program_desc.h"
19+
#include "paddle/fluid/framework/scope.h"
20+
#include "paddle/fluid/platform/device_context.h"
21+
22+
namespace paddle {
23+
namespace framework {
24+
25+
/*
26+
* Simple, intuitive and effective. Only single thread is supported, and
27+
* currently designed for inference.
28+
*/
29+
class NaiveExecutor {
30+
public:
31+
explicit NaiveExecutor(const platform::Place& place) : place_(place) {}
32+
33+
// Create child scope.
34+
// Create variables.
35+
// @with_feed_fetch_ops: whether to work with the feed and fetch operators.
36+
void Prepare(Scope* parent_scope, const ProgramDesc& program_desc,
37+
int block_id, bool with_feed_fetch_ops);
38+
39+
// Run all the operators.
40+
void Run();
41+
42+
// Get an tensor to operating directly, without the need for feed_ops.
43+
LoDTensor* FindTensor(const std::string& name);
44+
45+
Scope* scope() { return scope_; }
46+
47+
void CleanFeedFetchOps();
48+
49+
protected:
50+
void CreateVariables(const ProgramDesc& desc, Scope* scope, int block_id);
51+
52+
void CreateOps(const ProgramDesc& desc, int block_id,
53+
bool with_feed_fetch_ops);
54+
55+
private:
56+
const platform::Place place_;
57+
// Catch the required resource to avoid recreate.
58+
std::vector<std::unique_ptr<OperatorBase>> ops_;
59+
Scope* scope_;
60+
};
61+
62+
} // namespace framework
63+
} // namespace paddle
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/framework/naive_executor.h"
16+
#include <gtest/gtest.h>
17+
#include <algorithm>
18+
#include "paddle/fluid/framework/op_registry.h"
19+
#include "paddle/fluid/framework/program_desc.h"
20+
21+
namespace paddle {
22+
namespace framework {
23+
24+
TEST(NaiveExecutor, Basic) {
25+
ProgramDesc program;
26+
auto* main_block = program.MutableBlock(0);
27+
auto* a = main_block->Var("a"); // input
28+
auto* b = main_block->Var("b"); // input
29+
auto* c = main_block->Var("c"); // input
30+
a->SetType(proto::VarType::LOD_TENSOR);
31+
b->SetType(proto::VarType::LOD_TENSOR);
32+
c->SetType(proto::VarType::LOD_TENSOR);
33+
34+
auto* add = main_block->AppendOp();
35+
add->SetType("elementwise_add");
36+
add->SetInput("X", {"a"});
37+
add->SetInput("Y", {"b"});
38+
add->SetOutput("Out", {"c"});
39+
40+
auto place = platform::CPUPlace();
41+
NaiveExecutor exe(place);
42+
exe.Prepare(nullptr, program, 0, false /*with feed fetch ops*/);
43+
auto* a_tensor = exe.FindTensor("a");
44+
auto* b_tensor = exe.FindTensor("b");
45+
auto* c_tensor = exe.FindTensor("c");
46+
47+
a_tensor->Resize({1, 4});
48+
b_tensor->Resize({1, 4});
49+
c_tensor->Resize({1, 4});
50+
b_tensor->mutable_data<float>(place);
51+
a_tensor->mutable_data<float>(place);
52+
53+
float a_arr[] = {0, 1, 2, 3};
54+
float b_arr[] = {0.0, .1, .2, .3};
55+
56+
std::copy_n(a_arr, 4, a_tensor->mutable_data<float>(place));
57+
std::copy_n(b_arr, 4, b_tensor->mutable_data<float>(place));
58+
59+
exe.Run();
60+
61+
auto* c_data = c_tensor->mutable_data<float>(place);
62+
for (int i = 0; i < 4; i++) {
63+
EXPECT_NEAR(c_data[i], 1.1 * i, 1e-3);
64+
}
65+
}
66+
67+
} // namespace framework
68+
} // namespace paddle
69+
70+
USE_OP(elementwise_add);

paddle/fluid/framework/operator.cc

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,15 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
154154
platform::SetDeviceId(dev_id);
155155
#endif
156156
}
157-
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
158-
platform::RecordEvent record_event(Type(), pool.Get(place));
157+
158+
if (platform::IsProfileEnabled()) {
159+
platform::DeviceContextPool& pool =
160+
platform::DeviceContextPool::Instance();
161+
platform::RecordEvent record_event(Type(), pool.Get(place));
162+
}
163+
159164
RunImpl(scope, place);
165+
160166
if (VLOG_IS_ON(3)) {
161167
VLOG(3) << place << " " << DebugStringEx(&scope);
162168
}

0 commit comments

Comments
 (0)