Skip to content

Commit 7cd2417

Browse files
committed
Merge branch 'develop' into cpu-for-1.1-merge-with-shape
test=develop
2 parents 06ffbc4 + 0a80f06 commit 7cd2417

32 files changed

+578
-147
lines changed

CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ option(WITH_ANAKIN "Compile with Anakin library" OFF)
6969
option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE})
7070
option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
7171
option(WITH_INFERENCE "Compile fluid inference library" ON)
72+
option(ON_INFER "Turn on inference optimization." OFF)
7273
option(WITH_INFERENCE_API_TEST "Test fluid inference high-level api interface" OFF)
7374
option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
7475
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
@@ -179,6 +180,7 @@ include(external/eigen) # download eigen3
179180
include(external/pybind11) # download pybind11
180181
include(external/cares)
181182
include(external/cub)
183+
include(external/xxhash) # download xxhash
182184

183185
if (NOT WIN32)
184186
# there is no official support of snappystream, warpctc, nccl, cupti in windows
@@ -301,3 +303,8 @@ if(WITH_DOC)
301303
find_python_module(recommonmark REQUIRED)
302304
add_subdirectory(doc)
303305
endif()
306+
307+
if (ON_INFER)
308+
message(WARNING "On inference mode, will take place some specific optimization.")
309+
add_definitions(-DPADDLE_ON_INFERENCE)
310+
endif()

cmake/external/xxhash.cmake

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
INCLUDE(ExternalProject)
2+
3+
set(XXHASH_SOURCE_DIR ${THIRD_PARTY_PATH}/xxhash)
4+
set(XXHASH_INSTALL_DIR ${THIRD_PARTY_PATH}/install/xxhash)
5+
set(XXHASH_INCLUDE_DIR "${XXHASH_INSTALL_DIR}/include")
6+
7+
IF(WITH_STATIC_LIB)
8+
SET(BUILD_CMD make lib)
9+
ELSE()
10+
SET(BUILD_CMD sed -i "s/-Wstrict-prototypes -Wundef/-Wstrict-prototypes -Wundef -fPIC/g" ${XXHASH_SOURCE_DIR}/src/extern_xxhash/Makefile && make lib)
11+
ENDIF()
12+
13+
ExternalProject_Add(
14+
extern_xxhash
15+
${EXTERNAL_PROJECT_LOG_ARGS}
16+
GIT_REPOSITORY "https://github.com/Cyan4973/xxHash"
17+
GIT_TAG "v0.6.5"
18+
PREFIX ${XXHASH_SOURCE_DIR}
19+
DOWNLOAD_NAME "xxhash"
20+
UPDATE_COMMAND ""
21+
CONFIGURE_COMMAND ""
22+
BUILD_IN_SOURCE 1
23+
PATCH_COMMAND
24+
BUILD_COMMAND ${BUILD_CMD}
25+
INSTALL_COMMAND export PREFIX=${XXHASH_INSTALL_DIR}/ && make install
26+
TEST_COMMAND ""
27+
)
28+
29+
set(XXHASH_LIBRARIES "${XXHASH_INSTALL_DIR}/lib/libxxhash.a")
30+
INCLUDE_DIRECTORIES(${XXHASH_INCLUDE_DIR})
31+
32+
add_library(xxhash STATIC IMPORTED GLOBAL)
33+
set_property(TARGET xxhash PROPERTY IMPORTED_LOCATION ${XXHASH_LIBRARIES})
34+
include_directories(${XXHASH_INCLUDE_DIR})
35+
add_dependencies(xxhash extern_xxhash)
36+
37+
LIST(APPEND external_project_dependencies xxhash)
38+
39+
IF(WITH_C_API)
40+
INSTALL(DIRECTORY ${XXHASH_INCLUDE_DIR} DESTINATION third_party/xxhash)
41+
IF(ANDROID)
42+
INSTALL(FILES ${XXHASH_LIBRARIES} DESTINATION third_party/xxhash/lib/${ANDROID_ABI})
43+
ELSE()
44+
INSTALL(FILES ${XXHASH_LIBRARIES} DESTINATION third_party/xxhash/lib)
45+
ENDIF()
46+
ENDIF()

cmake/inference_lib.cmake

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414

1515
# make package for paddle fluid shared and static library
1616
function(copy TARGET)
17+
if (NOT ON_INFER)
18+
message(WARNING "Turn on the ON_INFER flag when building inference_lib only.")
19+
endif()
1720
set(options "")
1821
set(oneValueArgs "")
1922
set(multiValueArgs SRCS DSTS DEPS)
@@ -31,7 +34,7 @@ function(copy TARGET)
3134
foreach(index RANGE ${len})
3235
list(GET copy_lib_SRCS ${index} src)
3336
list(GET copy_lib_DSTS ${index} dst)
34-
add_custom_command(TARGET ${TARGET} PRE_BUILD
37+
add_custom_command(TARGET ${TARGET} PRE_BUILD
3538
COMMAND mkdir -p "${dst}"
3639
COMMAND cp -r "${src}" "${dst}"
3740
COMMENT "copying ${src} -> ${dst}")
@@ -67,6 +70,13 @@ copy(boost_lib
6770
DEPS boost
6871
)
6972

73+
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/xxhash")
74+
copy(xxhash_lib
75+
SRCS ${XXHASH_INCLUDE_DIR} ${XXHASH_LIBRARIES}
76+
DSTS ${dst_dir} ${dst_dir}/lib
77+
DEPS xxhash
78+
)
79+
7080
if(NOT PROTOBUF_FOUND)
7181
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/protobuf")
7282
copy(protobuf_lib
@@ -186,7 +196,7 @@ copy(cmake_cache
186196
DSTS ${FLUID_INSTALL_DIR})
187197

188198
# This command generates a complete fluid library for both train and inference
189-
add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep})
199+
add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep})
190200

191201
# Following commands generate a inference-only fluid library
192202
# third_party, version.txt and CMakeCache.txt are the same position with ${FLUID_INSTALL_DIR}

paddle/fluid/API.spec

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label'
176176
paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,))
177177
paddle.fluid.layers.sequence_reverse ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
178178
paddle.fluid.layers.affine_channel ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None))
179+
paddle.fluid.layers.hash ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None))
179180
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
180181
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
181182
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)

paddle/fluid/framework/ir/graph.cc

Lines changed: 0 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -24,74 +24,6 @@ namespace paddle {
2424
namespace framework {
2525
namespace ir {
2626

27-
std::vector<std::string> FindDistTrainSendVars(
28-
const std::vector<ir::Node *> &nodes) {
29-
std::vector<std::string> send_vars;
30-
// since parameters are all in block 0,
31-
// it's enough to only scan send ops in block 0
32-
for (auto &node : nodes) {
33-
auto op_vars = node->Op()->InputArgumentNames();
34-
send_vars.reserve(send_vars.size() +
35-
std::distance(op_vars.begin(), op_vars.end()));
36-
send_vars.insert(send_vars.end(), op_vars.begin(), op_vars.end());
37-
}
38-
return send_vars;
39-
}
40-
41-
std::vector<std::string> FindDistTrainRecvVars(
42-
const std::vector<ir::Node *> &nodes) {
43-
std::vector<std::string> recv_vars;
44-
for (auto &node : nodes) {
45-
auto op_vars = node->Op()->OutputArgumentNames();
46-
recv_vars.reserve(recv_vars.size() +
47-
std::distance(op_vars.begin(), op_vars.end()));
48-
recv_vars.insert(recv_vars.end(), op_vars.begin(), op_vars.end());
49-
}
50-
return recv_vars;
51-
}
52-
53-
bool IsDistTrainOp(ir::Node *node, const std::vector<std::string> &send_vars,
54-
const std::vector<std::string> &recv_vars) {
55-
if (send_vars.size() == 0 || recv_vars.size() == 0) {
56-
return false;
57-
}
58-
59-
/**
60-
* Check any of opvars contains `.block` and in sendvars
61-
*/
62-
auto checker = [](const std::vector<std::string> &opvars,
63-
const std::vector<std::string> &rpc_vars) -> bool {
64-
for (auto &var : opvars) {
65-
// a variable name with the suffix `.block` means it's a splited
66-
// variable by (DistributeTranspiler)
67-
// [python/paddle/fluid/transpiler/distribute_transpiler.py]
68-
if (var.find(".block") != std::string::npos &&
69-
std::find(rpc_vars.begin(), rpc_vars.end(), var) != rpc_vars.end()) {
70-
return true;
71-
}
72-
73-
if (!(var.find(".block") == std::string::npos &&
74-
var.find(".pserver") == std::string::npos) &&
75-
std::find(rpc_vars.begin(), rpc_vars.end(), var) != rpc_vars.end()) {
76-
return true;
77-
}
78-
}
79-
return false;
80-
};
81-
82-
std::vector<std::string> input_var_names;
83-
std::vector<std::string> output_var_names;
84-
for (ir::Node *input : node->inputs) {
85-
input_var_names.push_back(input->Name());
86-
}
87-
for (ir::Node *output : node->outputs) {
88-
output_var_names.push_back(output->Name());
89-
}
90-
91-
return checker(output_var_names, send_vars) ||
92-
checker(input_var_names, recv_vars);
93-
}
94-
9527
Graph::Graph(const ProgramDesc &program) : program_(program) {
9628
// Make the nodes id start from 0.
9729
Node::ResetId();

paddle/fluid/framework/ir/node.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ class Node {
4444
return op_desc_.get();
4545
}
4646

47+
// Please don't use this API!
4748
int id() const { return id_; }
4849

4950
bool IsOp() const { return type_ == Type::kOperation; }
@@ -92,6 +93,7 @@ class Node {
9293
Node() = delete;
9394

9495
static int count_;
96+
// Please don't use this API or make this public.
9597
static void ResetId() { count_ = 0; }
9698
DISABLE_COPY_AND_ASSIGN(Node);
9799
};

paddle/fluid/framework/lod_tensor_array.h

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,82 @@ limitations under the License. */
1818

1919
namespace paddle {
2020
namespace framework {
21+
22+
// NOTE The vector<LoDTensor> can't be replaced with the class LoDTensorArray
23+
// directly, because there are many vector<LoDTensor> used accross the project,
24+
// and some of them are treated as LoDTensorArray.
25+
#if !defined(PADDLE_ON_INFERENCE)
26+
2127
using LoDTensorArray = std::vector<LoDTensor>;
22-
}
28+
29+
#else // !PADDLE_ON_INFERENCE
30+
31+
#pragma message "LoDTensorArray is replaced with the inference one."
32+
/*
33+
* A LoDTensorArray which will not deallocate buffer when resized, fix the data
34+
* diff in inference, and more performance friendly in the concurrency
35+
* scenerios.
36+
*/
37+
class LoDTensorArray {
38+
public:
39+
LoDTensorArray() = default;
40+
41+
using iterator = std::vector<LoDTensor>::iterator;
42+
using const_iterator = std::vector<LoDTensor>::const_iterator;
43+
44+
const_iterator begin() const { return array_.begin(); }
45+
const_iterator end() const { return array_.begin() + size_; }
46+
iterator begin() { return array_.begin(); }
47+
iterator end() { return array_.begin() + size_; }
48+
49+
void push_back(const LoDTensor& x) {
50+
if (size_ < array_.size()) {
51+
array_[size_++] = x;
52+
} else {
53+
array_.push_back(x);
54+
++size_;
55+
}
56+
}
57+
void resize(size_t size) {
58+
if (array_.size() < size) {
59+
array_.resize(size);
60+
}
61+
size_ = size;
62+
}
63+
64+
void emplace_back() { array_.emplace_back(); }
65+
66+
void emplace_back(LoDTensor&& x) { array_.emplace_back(std::move(x)); }
67+
68+
LoDTensor& back() { return array_.back(); }
69+
70+
size_t space() const { return array_.size(); }
71+
72+
void reserve(size_t size) {
73+
// Naive warning to tell user this array might be to large. The memory and
74+
// buffer used by this TensorArray will not be deleted during the training
75+
// and inference phase, so attention not to make it expand too long.
76+
if (size > 800UL) {
77+
LOG(WARNING) << "TensorArray has more than 800 items";
78+
}
79+
array_.reserve(size);
80+
}
81+
82+
bool empty() const { return size_ == 0UL; }
83+
void clear() { size_ = 0UL; }
84+
85+
LoDTensor& operator[](size_t id) { return array_[id]; }
86+
const LoDTensor& operator[](size_t id) const { return array_[id]; }
87+
LoDTensor& at(size_t id) { return array_.at(id); }
88+
const LoDTensor& at(size_t id) const { return array_.at(id); }
89+
90+
size_t size() const { return size_; }
91+
92+
private:
93+
size_t size_{0};
94+
std::vector<LoDTensor> array_;
95+
};
96+
#endif // !PADDLE_ON_INFERENCE
97+
98+
} // namespace framework
2399
} // namespace paddle

paddle/fluid/framework/op_desc.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,10 +121,6 @@ class OpDesc {
121121

122122
BlockDesc *Block() { return this->block_; }
123123

124-
const BlockDesc &BlockRef() const { return *this->block_; }
125-
126-
void SetBlock(BlockDesc *block) { this->block_ = block; }
127-
128124
private:
129125
template <typename MapType>
130126
static std::vector<typename MapType::key_type> MapKeys(const MapType &map) {

paddle/fluid/framework/scope.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ class Scope {
7878
/// Drop all kids scopes belonged to this scope.
7979
void DropKids();
8080

81+
std::list<Scope*>& kids() const { return kids_; }
82+
8183
/// Find if a scope exists in the kid scopes
8284
bool HasKid(const Scope* scope) const;
8385

paddle/fluid/inference/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ if (WITH_GPU AND TENSORRT_FOUND)
3030
endif()
3131

3232
# Create static library
33-
cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor)
33+
cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array)
3434

3535
if(NOT APPLE)
3636
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
@@ -40,7 +40,7 @@ endif()
4040

4141
# Create shared library
4242
cc_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS}
43-
DEPS ${fluid_modules} paddle_fluid_api)
43+
DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array)
4444

4545
set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid)
4646
if(NOT APPLE)

0 commit comments

Comments
 (0)